NumericShaper

/*
 * @(#)NumericShaper.java	1.10 03/12/19
 *
 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
 * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
 */

package java.awt.font;

/**
 * The <code>NumericShaper</code> class is used to convert Latin-1 (European) 
 * digits to other Unicode decimal digits.  Users of this class will
 * primarily be people who wish to present data using
 * national digit shapes, but find it more convenient to represent the
 * data internally using Latin-1 (European) digits.  This does not
 * interpret the deprecated numeric shape selector character (U+206E).
 * <p>
 * Instances of <code>NumericShaper</code> are typically applied 
 * as attributes to text with the
 * {@link TextAttribute#NUMERIC_SHAPING NUMERIC_SHAPING} attribute
 * of the <code>TextAttribute</code> class.  
 * For example, this code snippet causes a <code>TextLayout</code> to 
 * shape European digits to Arabic in an Arabic context:<br>
 * <blockquote><pre>
 * Map map = new HashMap();
 * map.put(TextAttribute.NUMERIC_SHAPING, 
 *     NumericShaper.getContextualShaper(NumericShaper.ARABIC));
 * FontRenderContext frc = ...;
 * TextLayout layout = new TextLayout(text, map, frc);
 * layout.draw(g2d, x, y);
 * </pre></blockquote>
 * <br>
 * It is also possible to perform numeric shaping explicitly using instances 
 * of <code>NumericShaper</code>, as this code snippet demonstrates:<br>
 * <blockquote><pre>
 *   char[] text = ...;
 *   // shape all EUROPEAN digits (except zero) to ARABIC digits
 *   NumericShaper shaper = NumericShaper.getShaper(NumericShaper.ARABIC);
 *   shaper.shape(text, start, count);
 *
 *   // shape European digits to ARABIC digits if preceeding text is Arabic, or
 *   // shape European digits to TAMIL digits if preceeding text is Tamil, or
 *   // leave European digits alone if there is no preceeding text, or 
 *   // preceeding text is neither Arabic nor Tamil
 *   NumericShaper shaper = 
 *      NumericShaper.getContextualShaper(NumericShaper.ARABIC | 
 *                                              NumericShaper.TAMIL,
 *                                              NumericShaper.EUROPEAN);
 *   shaper.shape(text. start, count);
 * </pre></blockquote>
 *
 * @since 1.4
 */

public final class NumericShaper implements java.io.Serializable {
    /** index of context for contextual shaping - values range from 0 to 18 */
    private int key;

    /** flag indicating whether to shape contextually (high bit) and which 
     *  digit ranges to shape (bits 0-18) 
     */
    private int mask;

    /** Identifies the Latin-1 (European) and extended range, and 
     *  Latin-1 (European) decimal base. 
     */
    public static final int EUROPEAN = 1<<0;

    /** Identifies the ARABIC range and decimal base. */
    public static final int ARABIC = 1<<1;

    /** Identifies the ARABIC range and ARABIC_EXTENDED decimal base. */
    public static final int EASTERN_ARABIC = 1<<2;

    /** Identifies the DEVANAGARI range and decimal base. */
    public static final int DEVANAGARI = 1<<3;

    /** Identifies the BENGALI range and decimal base. */
    public static final int BENGALI = 1<<4;

    /** Identifies the GURMUKHI range and decimal base. */
    public static final int GURMUKHI = 1<<5;

    /** Identifies the GUJARATI range and decimal base. */
    public static final int GUJARATI = 1<<6;

    /** Identifies the ORIYA range and decimal base. */
    public static final int ORIYA = 1<<7;

    /** Identifies the TAMIL range and decimal base. Tamil does not have a 
     *  decimal digit 0 so Latin-1 (European) 0 is used. 
     */
    public static final int TAMIL = 1<<8;

    /** Identifies the TELUGU range and decimal base. */
    public static final int TELUGU = 1<<9;

    /** Identifies the KANNADA range and decimal base. */
    public static final int KANNADA = 1<<10;

    /** Identifies the MALAYALAM range and decimal base. */
    public static final int MALAYALAM = 1<<11;

    /** Identifies the THAI range and decimal base. */
    public static final int THAI = 1<<12;

    /** Identifies the LAO range and decimal base. */
    public static final int LAO = 1<<13;

    /** Identifies the TIBETAN range and decimal base. */
    public static final int TIBETAN = 1<<14;

    /** Identifies the MYANMAR range and decimal base. */
    public static final int MYANMAR = 1<<15;

    /** Identifies the ETHIOPIC range and decimal base. */
    public static final int ETHIOPIC = 1<<16;

    /** Identifies the KHMER range and decimal base. */
    public static final int KHMER = 1<<17;

    /** Identifies the MONGOLIAN range and decimal base. */
    public static final int MONGOLIAN = 1<<18;

    /** Identifies all ranges, for full contextual shaping. */
    public static final int ALL_RANGES = 0x0007ffff;

    private static final int EUROPEAN_KEY = 0;
    private static final int ARABIC_KEY = 1;
    private static final int EASTERN_ARABIC_KEY = 2;
    private static final int DEVANAGARI_KEY = 3;
    private static final int BENGALI_KEY = 4;
    private static final int GURMUKHI_KEY = 5;
    private static final int GUJARATI_KEY = 6;
    private static final int ORIYA_KEY = 7;
    private static final int TAMIL_KEY = 8;
    private static final int TELUGU_KEY = 9;
    private static final int KANNADA_KEY = 10;
    private static final int MALAYALAM_KEY = 11;
    private static final int THAI_KEY = 12;
    private static final int LAO_KEY = 13;
    private static final int TIBETAN_KEY = 14;
    private static final int MYANMAR_KEY = 15;
    private static final int ETHIOPIC_KEY = 16;
    private static final int KHMER_KEY = 17;
    private static final int MONGOLIAN_KEY = 18;

    private static final int NUM_KEYS = 19;

    private static final String[] keyNames = {
	"EUROPEAN",
	"ARABIC",
	"EASTERN_ARABIC",
	"DEVANAGARI",
	"BENGALI",
	"GURMUKHI",
	"GUJARATI",
	"ORIYA",
	"TAMIL",
	"TELUGU",
	"KANNADA",
	"MALAYALAM",
	"THAI",
	"LAO",
	"TIBETAN",
	"MYANMAR",
	"ETHIOPIC",
	"KHMER",
	"MONGOLIAN"
    };

    private static final int CONTEXTUAL_MASK = 1<<31;

    private static final char[] bases = {
	'\u0030' - '\u0030', // EUROPEAN
	'\u0660' - '\u0030', // ARABIC
	'\u06f0' - '\u0030', // EASTERN_ARABIC
	'\u0966' - '\u0030', // DEVANAGARI
	'\u09e6' - '\u0030', // BENGALI
	'\u0a66' - '\u0030', // GURMUKHI
	'\u0ae6' - '\u0030', // GUJARATI
	'\u0b66' - '\u0030', // ORIYA
	'\u0be7' - '\u0030', // TAMIL - note missing zero
	'\u0c66' - '\u0030', // TELUGU
	'\u0ce6' - '\u0030', // KANNADA
	'\u0d66' - '\u0030', // MALAYALAM
	'\u0e50' - '\u0030', // THAI
	'\u0ed0' - '\u0030', // LAO
	'\u0f20' - '\u0030', // TIBETAN
	'\u1040' - '\u0030', // MYANMAR
	'\u1369' - '\u0030', // ETHIOPIC
	'\u17e0' - '\u0030', // KHMER
	'\u1810' - '\u0030', // MONGOLIAN
    };

    // some ranges adjoin or overlap, rethink if we want to do a binary search on this

    private static final char[] contexts = {
	'\u0000', '\u0300', // 'EUROPEAN' (really latin-1 and extended)
	'\u0600', '\u0700', // ARABIC
	'\u0600', '\u0700', // EASTERN_ARABIC -- note overlap with arabic
	'\u0900', '\u0980', // DEVANAGARI
	'\u0980', '\u0a00', // BENGALI
	'\u0a00', '\u0a80', // GURMUKHI
	'\u0a80', '\u0b00', // GUJARATI
	'\u0b00', '\u0b80', // ORIYA
	'\u0b80', '\u0c00', // TAMIL - note missing zero
	'\u0c00', '\u0c80', // TELUGU
	'\u0c80', '\u0d00', // KANNADA
	'\u0d00', '\u0d80', // MALAYALAM
	'\u0e00', '\u0e80', // THAI
	'\u0e80', '\u0f00', // LAO
	'\u0f00', '\u1000', // TIBETAN
	'\u1000', '\u1080', // MYANMAR
	'\u1200', '\u1380', // ETHIOPIC
	'\u1780', '\u1800', // KHMER
	'\u1800', '\u1900', // MONGOLIAN
	'\uffff',
    };

    // assume most characters are near each other so probing the cache is infrequent,
    // and a linear probe is ok.

    private static int ctCache = 0;
    private static int ctCacheLimit = contexts.length - 2;

    // warning, synchronize access to this as it modifies state
    private static int getContextKey(char c) {
	if (c < contexts[ctCache]) {
	    while (ctCache > 0 && c < contexts[ctCache]) --ctCache;
	} else if (c >= contexts[ctCache + 1]) {
	    while (ctCache < ctCacheLimit && c >= contexts[ctCache + 1]) ++ctCache;
	}
	
	// if we're not in a known range, then return EUROPEAN as the range key
	return (ctCache & 0x1) == 0 ? (ctCache / 2) : EUROPEAN_KEY;
    }

    /*
     * A range table of strong directional characters (types L, R, AL).
     * Even (left) indexes are starts of ranges of non-strong-directional (or undefined)
     * characters, odd (right) indexes are starts of ranges of strong directional
     * characters.
     */
    private static char[] strongTable = {
	'\u0000', '\u0041',
	'\u005b', '\u0061',
	'\u007b', '\u00aa',
	'\u00ab', '\u00b5',
	'\u00b6', '\u00ba',
	'\u00bb', '\u00c0',
	'\u00d7', '\u00d8',
	'\u00f7', '\u00f8',
	'\u0220', '\u0222',
	'\u0234', '\u0250',
	'\u02ae', '\u02b0',
	'\u02b9', '\u02bb',
	'\u02c2', '\u02d0',
	'\u02d2', '\u02e0',
	'\u02e5', '\u02ee',
	'\u02ef', '\u037a',
	'\u037b', '\u0386',
	'\u0387', '\u0388',
	'\u038b', '\u038c',
	'\u038d', '\u038e',
	'\u03a2', '\u03a3',
	'\u03cf', '\u03d0',
	'\u03d8', '\u03da',
	'\u03f4', '\u0400',
	'\u0483', '\u048c',
	'\u04c5', '\u04c7',
	'\u04c9', '\u04cb',
	'\u04cd', '\u04d0',
	'\u04f6', '\u04f8',
	'\u04fa', '\u0531',
	'\u0557', '\u0559',
	'\u0560', '\u0561',
	'\u0588', '\u0589',
	'\u058a', '\u05be',
	'\u05bf', '\u05c0',
	'\u05c1', '\u05c3',
	'\u05c4', '\u05d0',
	'\u05eb', '\u05f0',
	'\u05f5', '\u061b',
	'\u061c', '\u061f',
	'\u0620', '\u0621',
	'\u063b', '\u0640',
	'\u064b', '\u066d',
	'\u066e', '\u0671',
	'\u06d6', '\u06e5',
	'\u06e7', '\u06fa',
	'\u06ff', '\u0700',
	'\u070e', '\u0710',
	'\u0711', '\u0712',
	'\u072d', '\u0780',
	'\u07a6', '\u0903',
	'\u0904', '\u0905',
	'\u093a', '\u093d',
	'\u0941', '\u0949',
 	'\u094d', '\u0950',
	'\u0951', '\u0958',
	'\u0962', '\u0964',
	'\u0971', '\u0982',
	'\u0984', '\u0985',
	'\u098d', '\u098f',
	'\u0991', '\u0993',
	'\u09a9', '\u09aa',
	'\u09b1', '\u09b2',
	'\u09b3', '\u09b6',
	'\u09ba', '\u09be',
	'\u09c1', '\u09c7',
	'\u09c9', '\u09cb',
	'\u09cd', '\u09d7',
	'\u09d8', '\u09dc',
	'\u09de', '\u09df',
	'\u09e2', '\u09e6',
	'\u09f2', '\u09f4',
	'\u09fb', '\u0a05',
	'\u0a0b', '\u0a0f',
	'\u0a11', '\u0a13',
	'\u0a29', '\u0a2a',
	'\u0a31', '\u0a32',
	'\u0a34', '\u0a35',
	'\u0a37', '\u0a38',
	'\u0a3a', '\u0a3e',
	'\u0a41', '\u0a59',
	'\u0a5d', '\u0a5e',
	'\u0a5f', '\u0a66',
	'\u0a70', '\u0a72',
	'\u0a75', '\u0a83',
	'\u0a84', '\u0a85',
	'\u0a8c', '\u0a8d',
	'\u0a8e', '\u0a8f',
	'\u0a92', '\u0a93',
	'\u0aa9', '\u0aaa',
	'\u0ab1', '\u0ab2',
	'\u0ab4', '\u0ab5',
	'\u0aba', '\u0abd',
	'\u0ac1', '\u0ac9',
	'\u0aca', '\u0acb',
	'\u0acd', '\u0ad0',
	'\u0ad1', '\u0ae0',
	'\u0ae1', '\u0ae6',
	'\u0af0', '\u0b02',
	'\u0b04', '\u0b05',
	'\u0b0d', '\u0b0f',
	'\u0b11', '\u0b13',
	'\u0b29', '\u0b2a',
	'\u0b31', '\u0b32',
	'\u0b34', '\u0b36',
	'\u0b3a', '\u0b3d',
	'\u0b3f', '\u0b40',
	'\u0b41', '\u0b47',
	'\u0b49', '\u0b4b',
	'\u0b4d', '\u0b57',
	'\u0b58', '\u0b5c',
	'\u0b5e', '\u0b5f',
	'\u0b62', '\u0b66',
	'\u0b71', '\u0b83',
	'\u0b84', '\u0b85',
	'\u0b8b', '\u0b8e',
	'\u0b91', '\u0b92',
	'\u0b96', '\u0b99',
	'\u0b9b', '\u0b9c',
	'\u0b9d', '\u0b9e',
	'\u0ba0', '\u0ba3',
	'\u0ba5', '\u0ba8',
	'\u0bab', '\u0bae',
	'\u0bb6', '\u0bb7',
	'\u0bba', '\u0bbe',
	'\u0bc0', '\u0bc1',
	'\u0bc3', '\u0bc6',
	'\u0bc9', '\u0bca',
	'\u0bcd', '\u0bd7',
	'\u0bd8', '\u0be7',
	'\u0bf3', '\u0c01',
	'\u0c04', '\u0c05',
	'\u0c0d', '\u0c0e',
	'\u0c11', '\u0c12',
	'\u0c29', '\u0c2a',
	'\u0c34', '\u0c35',
	'\u0c3a', '\u0c41',
	'\u0c45', '\u0c60',
	'\u0c62', '\u0c66',
	'\u0c70', '\u0c82',
	'\u0c84', '\u0c85',
	'\u0c8d', '\u0c8e',
	'\u0c91', '\u0c92',
	'\u0ca9', '\u0caa',
	'\u0cb4', '\u0cb5',
	'\u0cba', '\u0cbe',
	'\u0cbf', '\u0cc0',
	'\u0cc5', '\u0cc7',
	'\u0cc9', '\u0cca',
	'\u0ccc', '\u0cd5',
	'\u0cd7', '\u0cde',
	'\u0cdf', '\u0ce0',
	'\u0ce2', '\u0ce6',
	'\u0cf0', '\u0d02',
	'\u0d04', '\u0d05',
	'\u0d0d', '\u0d0e',
	'\u0d11', '\u0d12',
	'\u0d29', '\u0d2a',
	'\u0d3a', '\u0d3e',
	'\u0d41', '\u0d46',
	'\u0d49', '\u0d4a',
	'\u0d4d', '\u0d57',
	'\u0d58', '\u0d60',
	'\u0d62', '\u0d66',
	'\u0d70', '\u0d82',
	'\u0d84', '\u0d85',
	'\u0d97', '\u0d9a',
	'\u0db2', '\u0db3',
	'\u0dbc', '\u0dbd',
	'\u0dbe', '\u0dc0',
	'\u0dc7', '\u0dcf',
	'\u0dd2', '\u0dd8',
	'\u0de0', '\u0df2',
	'\u0df5', '\u0e01',
	'\u0e31', '\u0e32',
	'\u0e34', '\u0e40',
	'\u0e47', '\u0e4f',
	'\u0e5c', '\u0e81',
	'\u0e83', '\u0e84',
	'\u0e85', '\u0e87',
	'\u0e89', '\u0e8a',
	'\u0e8b', '\u0e8d',
	'\u0e8e', '\u0e94',
	'\u0e98', '\u0e99',
	'\u0ea0', '\u0ea1',
	'\u0ea4', '\u0ea5',
	'\u0ea6', '\u0ea7',
	'\u0ea8', '\u0eaa',
	'\u0eac', '\u0ead',
	'\u0eb1', '\u0eb2',
	'\u0eb4', '\u0ebd',
	'\u0ebe', '\u0ec0',
	'\u0ec5', '\u0ec6',
	'\u0ec7', '\u0ed0',
	'\u0eda', '\u0edc',
	'\u0ede', '\u0f00',
	'\u0f18', '\u0f1a',
	'\u0f35', '\u0f36',
	'\u0f37', '\u0f38',
	'\u0f39', '\u0f3e',
	'\u0f48', '\u0f49',
	'\u0f6b', '\u0f7f',
	'\u0f80', '\u0f85',
	'\u0f86', '\u0f88',
	'\u0f8c', '\u0fbe',
	'\u0fc6', '\u0fc7',
	'\u0fcd', '\u0fcf',
	'\u0fd0', '\u1000',
	'\u1022', '\u1023',
	'\u1028', '\u1029',
	'\u102b', '\u102c',
	'\u102d', '\u1031',
	'\u1032', '\u1038',
	'\u1039', '\u1040',
	'\u1058', '\u10a0',
	'\u10c6', '\u10d0',
	'\u10f7', '\u10fb',
	'\u10fc', '\u1100',
	'\u115a', '\u115f',
	'\u11a3', '\u11a8',
	'\u11fa', '\u1200',
	'\u1207', '\u1208',
	'\u1247', '\u1248',
	'\u1249', '\u124a',
	'\u124e', '\u1250',
	'\u1257', '\u1258',
	'\u1259', '\u125a',
	'\u125e', '\u1260',
	'\u1287', '\u1288',
	'\u1289', '\u128a',
	'\u128e', '\u1290',
	'\u12af', '\u12b0',
	'\u12b1', '\u12b2',
	'\u12b6', '\u12b8',
	'\u12bf', '\u12c0',
	'\u12c1', '\u12c2',
	'\u12c6', '\u12c8',
	'\u12cf', '\u12d0',
	'\u12d7', '\u12d8',
	'\u12ef', '\u12f0',
	'\u130f', '\u1310',
	'\u1311', '\u1312',
	'\u1316', '\u1318',
	'\u131f', '\u1320',
	'\u1347', '\u1348',
	'\u135b', '\u1361',
	'\u137d', '\u13a0',
	'\u13f5', '\u1401',
	'\u1677', '\u1681',
	'\u169b', '\u16a0',
	'\u16f1', '\u1780',
	'\u17b7', '\u17be',
	'\u17c6', '\u17c7',
	'\u17c9', '\u17d4',
	'\u17db', '\u17dc',
	'\u17dd', '\u17e0',
	'\u17ea', '\u1810',
	'\u181a', '\u1820',
	'\u1878', '\u1880',
	'\u18a9', '\u1e00',
	'\u1e9c', '\u1ea0',
	'\u1efa', '\u1f00',
	'\u1f16', '\u1f18',
	'\u1f1e', '\u1f20',
	'\u1f46', '\u1f48',
	'\u1f4e', '\u1f50',
	'\u1f58', '\u1f59',
	'\u1f5a', '\u1f5b',
	'\u1f5c', '\u1f5d',
	'\u1f5e', '\u1f5f',
	'\u1f7e', '\u1f80',
	'\u1fb5', '\u1fb6',
	'\u1fbd', '\u1fbe',
	'\u1fbf', '\u1fc2',
	'\u1fc5', '\u1fc6',
	'\u1fcd', '\u1fd0',
	'\u1fd4', '\u1fd6',
	'\u1fdc', '\u1fe0',
	'\u1fed', '\u1ff2',
	'\u1ff5', '\u1ff6',
	'\u1ffd', '\u200e',
	'\u2010', '\u207f',
	'\u2080', '\u2102',
	'\u2103', '\u2107',
	'\u2108', '\u210a',
	'\u2114', '\u2115',
	'\u2116', '\u2119',
	'\u211e', '\u2124',
	'\u2125', '\u2126',
	'\u2127', '\u2128',
	'\u2129', '\u212a',
	'\u212e', '\u212f',
	'\u2132', '\u2133',
	'\u213a', '\u2160',
	'\u2184', '\u2336',
	'\u237b', '\u2395',
	'\u2396', '\u249c',
	'\u24ea', '\u3005',
	'\u3008', '\u3021',
	'\u302a', '\u3031',
	'\u3036', '\u3038',
	'\u303b', '\u3041',
	'\u3095', '\u309d',
	'\u309f', '\u30a1',
	'\u30fb', '\u30fc',
	'\u30ff', '\u3105',
	'\u312d', '\u3131',
	'\u318f', '\u3190',
	'\u31b8', '\u3200',
	'\u321d', '\u3220',
	'\u3244', '\u3260',
	'\u327c', '\u327f',
	'\u32b1', '\u32c0',
	'\u32cc', '\u32d0',
	'\u32ff', '\u3300',
	'\u3377', '\u337b',
	'\u33de', '\u33e0',
	'\u33ff', '\u3400',
	'\u4db6', '\u4e00',
	'\u9fa6', '\ua000',
	'\ua48d', '\uac00',
	'\ud7a4', '\uf900',
	'\ufa2e', '\ufb00',
	'\ufb07', '\ufb13',
	'\ufb18', '\ufb1d',
	'\ufb1e', '\ufb1f',
	'\ufb29', '\ufb2a',
	'\ufb37', '\ufb38',
	'\ufb3d', '\ufb3e',
	'\ufb3f', '\ufb40',
	'\ufb42', '\ufb43',
	'\ufb45', '\ufb46',
	'\ufbb2', '\ufbd3',
	'\ufd3e', '\ufd50',
	'\ufd90', '\ufd92',
	'\ufdc8', '\ufdf0',
	'\ufdfc', '\ufe70',
	'\ufe73', '\ufe74',
	'\ufe75', '\ufe76',
	'\ufefd', '\uff21',
	'\uff3b', '\uff41',
	'\uff5b', '\uff66',
	'\uffbf', '\uffc2',
	'\uffc8', '\uffca',
	'\uffd0', '\uffd2',
	'\uffd8', '\uffda',
	'\uffdd', '\uffff' // last entry is sentinel, actually never checked
    };


    // use a binary search with a cache

    private static int stCache = 0;

    // warning, synchronize access to this as it modifies state
    private static boolean isStrongDirectional(char c) {
	if (c < strongTable[stCache]) {
	    stCache = search(c, strongTable, 0, stCache);
	} else if (c >= strongTable[stCache + 1]) {
	    stCache = search(c, strongTable, stCache + 1, strongTable.length - stCache - 1);
	}
	return (stCache & 0x1) == 1;
    }

    static private int getKeyFromMask(int mask) {
	int key = 0;
	while (key < NUM_KEYS && ((mask & (1<<key)) == 0)) {
	    ++key;
	}
	if (key == NUM_KEYS || ((mask & ~(1<<key)) != 0)) {
	    throw new IllegalArgumentException("invalid shaper: " + Integer.toHexString(mask));
	}
	return key;
    }

    /**
     * Returns a shaper for the provided unicode range.  All 
     * Latin-1 (EUROPEAN) digits are converted 
     * to the corresponding decimal unicode digits.
     * @param singleRange the specified Unicode range
     * @return a non-contextual numeric shaper 
     * @throws IllegalArgumentException if the range is not a single range
     */
    static public NumericShaper getShaper(int singleRange) {
	int key = getKeyFromMask(singleRange);
	return new NumericShaper(key, singleRange);
    }
    
    /**
     * Returns a contextual shaper for the provided unicode range(s).  
     * Latin-1 (EUROPEAN) digits are converted to the decimal digits 
     * corresponding to the range of the preceeding text, if the
     * range is one of the provided ranges.  Multiple ranges are 
     * represented by or-ing the values together, such as, 
     * <code>NumericShaper.ARABIC | NumericShaper.THAI</code>.  The
     * shaper assumes EUROPEAN as the starting context, that is, if 
     * EUROPEAN digits are encountered before any strong directional 
     * text in the string, the context is presumed to be EUROPEAN, and 
     * so the digits will not shape.
     * @param ranges the specified Unicode ranges
     * @return a shaper for the specified ranges
     */
    static public NumericShaper getContextualShaper(int ranges) {
	ranges |= CONTEXTUAL_MASK;
	return new NumericShaper(EUROPEAN_KEY, ranges);
    }

    /**
     * Returns a contextual shaper for the provided unicode range(s).  
     * Latin-1 (EUROPEAN) digits will be converted to the decimal digits 
     * corresponding to the range of the preceeding text, if the
     * range is one of the provided ranges.  Multiple ranges are 
     * represented by or-ing the values together, for example, 
     * <code>NumericShaper.ARABIC | NumericShaper.THAI</code>.  The
     * shaper uses defaultContext as the starting context.
     * @param ranges the specified Unicode ranges
     * @param defaultContext the starting context, such as 
     * <code>NumericShaper.EUROPEAN</code>
     * @return a shaper for the specified Unicode ranges.
     */
    static public NumericShaper getContextualShaper(int ranges, int defaultContext) {
	int key = getKeyFromMask(defaultContext);
	ranges |= CONTEXTUAL_MASK;
	return new NumericShaper(key, ranges);
    }

    /**
     * Private constructor.
     */
    private NumericShaper(int key, int mask) {
	this.key = key;
	this.mask = mask;
    }

    /**
     * Converts the digits in the text that occur between start and 
     * start + count.
     * @param text an array of characters to convert
     * @param start the index into <code>text</code> to start 
     *        converting
     * @param count the number of characters in <code>text</code>
     *        to convert
     */
    public void shape(char[] text, int start, int count) {
	if (isContextual()) {
	    shapeContextually(text, start, count, key);
	} else {
	    shapeNonContextually(text, start, count);
	}
    }

    /**
     * Converts the digits in the text that occur between start and 
     * start + count, using the provided context.
     * Context is ignored if the shaper is not a contextual shaper.
     * @param text an array of characters
     * @param start the index into <code>text</code> to start
     *        converting
     * @param count the number of characters in <code>text</code>
     *        to convert
     * @param context the context to which to convert the 
     *        characters, such as <code>NumericShaper.EUROPEAN</code>
     */
    public void shape(char[] text, int start, int count, int context) {
	if (isContextual()) {
	    int ctxKey = getKeyFromMask(context);
	    shapeContextually(text, start, count, ctxKey);
	} else {
	    shapeNonContextually(text, start, count);
	}
    }

    /**
     * Returns a <code>boolean</code> indicating whether or not
     * this shaper shapes contextually.
     * @return <code>true</code> if this shaper is contextual; 
     *         <code>false</code> otherwise.
     */
    public boolean isContextual() {
	return (mask & CONTEXTUAL_MASK) != 0;
    }

    /**
     * Returns an <code>int</code> that ORs together the values for
     * all the ranges that will be shaped.
     * <p>
     * For example, to check if a shaper shapes to Arabic, you would use the 
     * following:
     * <blockquote>
     *   <code>if ((shaper.getRanges() & shaper.ARABIC) != 0) { ... </code>
     * </blockquote>
     * @return the values for all the ranges to be shaped.
     */
    public int getRanges() {
	return mask & ~CONTEXTUAL_MASK;
    }

    /**
     * Perform non-contextual shaping.
     */
    private void shapeNonContextually(char[] text, int start, int count) {
	int base = bases[key];
	char minDigit = key == TAMIL_KEY ? '\u0031' : '\u0030'; // Tamil doesn't use decimal zero
	for (int i = start, e = start + count; i < e; ++i) {
	    char c = text[i];
	    if (c >= minDigit && c <= '\u0039') {
		text[i] = (char)(c + base);
	    }
	}
    }

    /**
     * Perform contextual shaping.
     * Synchronized to protect caches used in getContextKey and isStrongDirectional.
     */
    private synchronized void shapeContextually(char[] text, int start, int count, int ctxKey) {

	// if we don't support this context, then don't shape
	if ((mask & (1<<ctxKey)) == 0) {
	    ctxKey = EUROPEAN_KEY;
	}
	int lastkey = ctxKey;

	int base = bases[ctxKey];
	char minDigit = ctxKey == TAMIL_KEY ? '\u0031' : '\u0030'; // Tamil doesn't use decimal zero

	for (int i = start, e = start + count; i < e; ++i) {
	    char c = text[i];
	    if (c >= minDigit && c <= '\u0039') {
		text[i] = (char)(c + base);
	    }

	    if (isStrongDirectional(c)) {
		int newkey = getContextKey(c);
		if (newkey != lastkey) {
		    lastkey = newkey;

		    ctxKey = newkey;
		    if (((mask & EASTERN_ARABIC) != 0) && (ctxKey == ARABIC_KEY || ctxKey == EASTERN_ARABIC_KEY)) {
			ctxKey = EASTERN_ARABIC_KEY;
		    } else if ((mask & (1<<ctxKey)) == 0) { 
			ctxKey = EUROPEAN_KEY;
		    }

		    base = bases[ctxKey];

		    minDigit = ctxKey == TAMIL_KEY ? '\u0031' : '\u0030'; // Tamil doesn't use decimal zero
		}
	    }
	}
    }

    /**
     * Returns a hash code for this shaper.
     * @return this shaper's hash code.
     * @see java.lang.Object#hashCode
     */
    public int hashCode() {
	return mask;
    }

    /**
     * Returns true if the specified object is an instance of 
     * <code>NumericShaper</code> and shapes identically to this one.
     * @param o the specified object to compare to this
     *          <code>NumericShaper</code>
     * @return <code>true</code> if <code>o</code> is an instance
     *         of <code>NumericShaper</code> and shapes in the same way;
     *         <code>false</code> otherwise.
     * @see java.lang.Object#equals(java.lang.Object)
     */
    public boolean equals(Object o) {
	if (o != null) {
	    try {
		NumericShaper rhs = (NumericShaper)o;
		return rhs.mask == mask && rhs.key == key;
	    }
	    catch (ClassCastException e) {
	    }
	}
	return false;
    }

    /**
     * Returns a <code>String</code> that describes this shaper. This method 
     * is used for debugging purposes only.
     * @return a <code>String</code> describing this shaper.
     */
    public String toString() {
	StringBuffer buf = new StringBuffer(super.toString());

	buf.append("[contextual:" + isContextual());

	if (isContextual()) {
	    buf.append(", context:" + keyNames[key]);
	}

	buf.append(", range(s): ");
	boolean first = true;
	for (int i = 0; i < NUM_KEYS; ++i) {
	    if ((mask & (1 << i)) != 0) {
		if (first) {
		    first = false;
		} else {
		    buf.append(", ");
		}
		buf.append(keyNames[i]);
	    }
	}
	buf.append(']');

	return buf.toString();
    }

    /**
     * Returns the index of the high bit in value (assuming le, actually 
     * power of 2 >= value). value must be positive.
     */
    private static int getHighBit(int value) { 
	if (value <= 0) {
	    return -32; 
	}

	int bit = 0;

	if (value >= 1 << 16) {
	    value >>= 16;
	    bit += 16;
	}

	if (value >= 1 << 8) {
	    value >>= 8;
	    bit += 8;
	}

	if (value >= 1 << 4) {
	    value >>= 4;
	    bit += 4;
	}

	if (value >= 1 << 2) {
	    value >>= 2;
	    bit += 2;
	}

	if (value >= 1 << 1) {
	    value >>= 1;
	    bit += 1;
	}
	
	return bit;
    }

    /**
     * fast binary search over subrange of array.
     */
    private static int search(char value, char[] array, int start, int length) 
    {
	int power = 1 << getHighBit(length);
	int extra = length - power;
	int probe = power;
	int index = start;

	if (value >= array[index + extra]) {
	    index += extra;
	}

	while (probe > 1) {
	    probe >>= 1;

	    if (value >= array[index + probe]) {
		index += probe;
	    }
	}

	return index;
    }
}