1. /*
  2. * The Apache Software License, Version 1.1
  3. *
  4. *
  5. * Copyright (c) 1999-2003 The Apache Software Foundation. All rights
  6. * reserved.
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions
  10. * are met:
  11. *
  12. * 1. Redistributions of source code must retain the above copyright
  13. * notice, this list of conditions and the following disclaimer.
  14. *
  15. * 2. Redistributions in binary form must reproduce the above copyright
  16. * notice, this list of conditions and the following disclaimer in
  17. * the documentation and/or other materials provided with the
  18. * distribution.
  19. *
  20. * 3. The end-user documentation included with the redistribution,
  21. * if any, must include the following acknowledgment:
  22. * "This product includes software developed by the
  23. * Apache Software Foundation (http://www.apache.org/)."
  24. * Alternately, this acknowledgment may appear in the software itself,
  25. * if and wherever such third-party acknowledgments normally appear.
  26. *
  27. * 4. The names "Xerces" and "Apache Software Foundation" must
  28. * not be used to endorse or promote products derived from this
  29. * software without prior written permission. For written
  30. * permission, please contact apache@apache.org.
  31. *
  32. * 5. Products derived from this software may not be called "Apache",
  33. * nor may "Apache" appear in their name, without prior written
  34. * permission of the Apache Software Foundation.
  35. *
  36. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  37. * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  38. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  39. * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  40. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  41. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  42. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  43. * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  44. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  45. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  46. * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  47. * SUCH DAMAGE.
  48. * ====================================================================
  49. *
  50. * This software consists of voluntary contributions made by many
  51. * individuals on behalf of the Apache Software Foundation and was
  52. * originally based on software copyright (c) 1999, International
  53. * Business Machines, Inc., http://www.apache.org. For more
  54. * information on the Apache Software Foundation, please see
  55. * <http://www.apache.org/>.
  56. */
  57. package com.sun.org.apache.xerces.internal.impl.xpath.regex;
  58. import java.util.Hashtable;
  59. import java.util.Locale;
  60. /**
  61. * A regular expression parser for the XML Shema.
  62. *
  63. * @author TAMURA Kent <kent@trl.ibm.co.jp>
  64. * @version $Id: ParserForXMLSchema.java,v 1.5 2003/03/24 23:37:55 sandygao Exp $
  65. */
  66. class ParserForXMLSchema extends RegexParser {
  67. public ParserForXMLSchema() {
  68. //this.setLocale(Locale.getDefault());
  69. }
  70. public ParserForXMLSchema(Locale locale) {
  71. //this.setLocale(locale);
  72. }
  73. Token processCaret() throws ParseException {
  74. this.next();
  75. return Token.createChar('^');
  76. }
  77. Token processDollar() throws ParseException {
  78. this.next();
  79. return Token.createChar('$');
  80. }
  81. Token processLookahead() throws ParseException {
  82. throw ex("parser.process.1", this.offset);
  83. }
  84. Token processNegativelookahead() throws ParseException {
  85. throw ex("parser.process.1", this.offset);
  86. }
  87. Token processLookbehind() throws ParseException {
  88. throw ex("parser.process.1", this.offset);
  89. }
  90. Token processNegativelookbehind() throws ParseException {
  91. throw ex("parser.process.1", this.offset);
  92. }
  93. Token processBacksolidus_A() throws ParseException {
  94. throw ex("parser.process.1", this.offset);
  95. }
  96. Token processBacksolidus_Z() throws ParseException {
  97. throw ex("parser.process.1", this.offset);
  98. }
  99. Token processBacksolidus_z() throws ParseException {
  100. throw ex("parser.process.1", this.offset);
  101. }
  102. Token processBacksolidus_b() throws ParseException {
  103. throw ex("parser.process.1", this.offset);
  104. }
  105. Token processBacksolidus_B() throws ParseException {
  106. throw ex("parser.process.1", this.offset);
  107. }
  108. Token processBacksolidus_lt() throws ParseException {
  109. throw ex("parser.process.1", this.offset);
  110. }
  111. Token processBacksolidus_gt() throws ParseException {
  112. throw ex("parser.process.1", this.offset);
  113. }
  114. Token processStar(Token tok) throws ParseException {
  115. this.next();
  116. return Token.createClosure(tok);
  117. }
  118. Token processPlus(Token tok) throws ParseException {
  119. // X+ -> XX*
  120. this.next();
  121. return Token.createConcat(tok, Token.createClosure(tok));
  122. }
  123. Token processQuestion(Token tok) throws ParseException {
  124. // X? -> X|
  125. this.next();
  126. Token par = Token.createUnion();
  127. par.addChild(tok);
  128. par.addChild(Token.createEmpty());
  129. return par;
  130. }
  131. boolean checkQuestion(int off) {
  132. return false;
  133. }
  134. Token processParen() throws ParseException {
  135. this.next();
  136. Token tok = Token.createParen(this.parseRegex(), 0);
  137. if (this.read() != super.T_RPAREN) throw ex("parser.factor.1", this.offset-1);
  138. this.next(); // Skips ')'
  139. return tok;
  140. }
  141. Token processParen2() throws ParseException {
  142. throw ex("parser.process.1", this.offset);
  143. }
  144. Token processCondition() throws ParseException {
  145. throw ex("parser.process.1", this.offset);
  146. }
  147. Token processModifiers() throws ParseException {
  148. throw ex("parser.process.1", this.offset);
  149. }
  150. Token processIndependent() throws ParseException {
  151. throw ex("parser.process.1", this.offset);
  152. }
  153. Token processBacksolidus_c() throws ParseException {
  154. this.next();
  155. return this.getTokenForShorthand('c');
  156. }
  157. Token processBacksolidus_C() throws ParseException {
  158. this.next();
  159. return this.getTokenForShorthand('C');
  160. }
  161. Token processBacksolidus_i() throws ParseException {
  162. this.next();
  163. return this.getTokenForShorthand('i');
  164. }
  165. Token processBacksolidus_I() throws ParseException {
  166. this.next();
  167. return this.getTokenForShorthand('I');
  168. }
  169. Token processBacksolidus_g() throws ParseException {
  170. throw this.ex("parser.process.1", this.offset-2);
  171. }
  172. Token processBacksolidus_X() throws ParseException {
  173. throw ex("parser.process.1", this.offset-2);
  174. }
  175. Token processBackreference() throws ParseException {
  176. throw ex("parser.process.1", this.offset-4);
  177. }
  178. int processCIinCharacterClass(RangeToken tok, int c) {
  179. tok.mergeRanges(this.getTokenForShorthand(c));
  180. return -1;
  181. }
  182. /**
  183. * Parses a character-class-expression, not a character-class-escape.
  184. *
  185. * c-c-expression ::= '[' c-group ']'
  186. * c-group ::= positive-c-group | negative-c-group | c-c-subtraction
  187. * positive-c-group ::= (c-range | c-c-escape)+
  188. * negative-c-group ::= '^' positive-c-group
  189. * c-c-subtraction ::= (positive-c-group | negative-c-group) subtraction
  190. * subtraction ::= '-' c-c-expression
  191. * c-range ::= single-range | from-to-range
  192. * single-range ::= multi-c-escape | category-c-escape | block-c-escape | <any XML char>
  193. * cc-normal-c ::= <any character except [, ], \>
  194. * from-to-range ::= cc-normal-c '-' cc-normal-c
  195. *
  196. * @param useNrage Ignored.
  197. * @return This returns no NrageToken.
  198. */
  199. protected RangeToken parseCharacterClass(boolean useNrange) throws ParseException {
  200. this.setContext(S_INBRACKETS);
  201. this.next(); // '['
  202. boolean nrange = false;
  203. RangeToken base = null;
  204. RangeToken tok;
  205. if (this.read() == T_CHAR && this.chardata == '^') {
  206. nrange = true;
  207. this.next(); // '^'
  208. base = Token.createRange();
  209. base.addRange(0, Token.UTF16_MAX);
  210. tok = Token.createRange();
  211. } else {
  212. tok = Token.createRange();
  213. }
  214. int type;
  215. boolean firstloop = true;
  216. while ((type = this.read()) != T_EOF) { // Don't use 'cotinue' for this loop.
  217. // single-range | from-to-range | subtraction
  218. if (type == T_CHAR && this.chardata == ']' && !firstloop) {
  219. if (nrange) {
  220. base.subtractRanges(tok);
  221. tok = base;
  222. }
  223. break;
  224. }
  225. int c = this.chardata;
  226. boolean end = false;
  227. if (type == T_BACKSOLIDUS) {
  228. switch (c) {
  229. case 'd': case 'D':
  230. case 'w': case 'W':
  231. case 's': case 'S':
  232. tok.mergeRanges(this.getTokenForShorthand(c));
  233. end = true;
  234. break;
  235. case 'i': case 'I':
  236. case 'c': case 'C':
  237. c = this.processCIinCharacterClass(tok, c);
  238. if (c < 0) end = true;
  239. break;
  240. case 'p':
  241. case 'P':
  242. int pstart = this.offset;
  243. RangeToken tok2 = this.processBacksolidus_pP(c);
  244. if (tok2 == null) throw this.ex("parser.atom.5", pstart);
  245. tok.mergeRanges(tok2);
  246. end = true;
  247. break;
  248. default:
  249. c = this.decodeEscaped();
  250. } // \ + c
  251. } // backsolidus
  252. else if (type == T_XMLSCHEMA_CC_SUBTRACTION && !firstloop) {
  253. // Subraction
  254. if (nrange) {
  255. base.subtractRanges(tok);
  256. tok = base;
  257. }
  258. RangeToken range2 = this.parseCharacterClass(false);
  259. tok.subtractRanges(range2);
  260. if (this.read() != T_CHAR || this.chardata != ']')
  261. throw this.ex("parser.cc.5", this.offset);
  262. break; // Exit this loop
  263. }
  264. this.next();
  265. if (!end) { // if not shorthands...
  266. if (type == T_CHAR) {
  267. if (c == '[') throw this.ex("parser.cc.6", this.offset-2);
  268. if (c == ']') throw this.ex("parser.cc.7", this.offset-2);
  269. if (c == '-') throw this.ex("parser.cc.8", this.offset-2);
  270. }
  271. if (this.read() != T_CHAR || this.chardata != '-') { // Here is no '-'.
  272. tok.addRange(c, c);
  273. } else { // Found '-'
  274. // Is this '-' is a from-to token??
  275. this.next(); // Skips '-'
  276. if ((type = this.read()) == T_EOF) throw this.ex("parser.cc.2", this.offset);
  277. // c '-' ']' -> '-' is a single-range.
  278. if ((type == T_CHAR && this.chardata == ']')
  279. || type == T_XMLSCHEMA_CC_SUBTRACTION) {
  280. throw this.ex("parser.cc.8", this.offset-1);
  281. } else {
  282. int rangeend = this.chardata;
  283. if (type == T_CHAR) {
  284. if (rangeend == '[') throw this.ex("parser.cc.6", this.offset-1);
  285. if (rangeend == ']') throw this.ex("parser.cc.7", this.offset-1);
  286. if (rangeend == '-') throw this.ex("parser.cc.8", this.offset-2);
  287. }
  288. else if (type == T_BACKSOLIDUS)
  289. rangeend = this.decodeEscaped();
  290. this.next();
  291. if (c > rangeend) throw this.ex("parser.ope.3", this.offset-1);
  292. tok.addRange(c, rangeend);
  293. }
  294. }
  295. }
  296. firstloop = false;
  297. }
  298. if (this.read() == T_EOF)
  299. throw this.ex("parser.cc.2", this.offset);
  300. tok.sortRanges();
  301. tok.compactRanges();
  302. //tok.dumpRanges();
  303. this.setContext(S_NORMAL);
  304. this.next(); // Skips ']'
  305. return tok;
  306. }
  307. protected RangeToken parseSetOperations() throws ParseException {
  308. throw this.ex("parser.process.1", this.offset);
  309. }
  310. Token getTokenForShorthand(int ch) {
  311. switch (ch) {
  312. case 'd':
  313. return ParserForXMLSchema.getRange("xml:isDigit", true);
  314. case 'D':
  315. return ParserForXMLSchema.getRange("xml:isDigit", false);
  316. case 'w':
  317. return ParserForXMLSchema.getRange("xml:isWord", true);
  318. case 'W':
  319. return ParserForXMLSchema.getRange("xml:isWord", false);
  320. case 's':
  321. return ParserForXMLSchema.getRange("xml:isSpace", true);
  322. case 'S':
  323. return ParserForXMLSchema.getRange("xml:isSpace", false);
  324. case 'c':
  325. return ParserForXMLSchema.getRange("xml:isNameChar", true);
  326. case 'C':
  327. return ParserForXMLSchema.getRange("xml:isNameChar", false);
  328. case 'i':
  329. return ParserForXMLSchema.getRange("xml:isInitialNameChar", true);
  330. case 'I':
  331. return ParserForXMLSchema.getRange("xml:isInitialNameChar", false);
  332. default:
  333. throw new RuntimeException("Internal Error: shorthands: \\u"+Integer.toString(ch, 16));
  334. }
  335. }
  336. int decodeEscaped() throws ParseException {
  337. if (this.read() != T_BACKSOLIDUS) throw ex("parser.next.1", this.offset-1);
  338. int c = this.chardata;
  339. switch (c) {
  340. case 'n': c = '\n'; break; // LINE FEED U+000A
  341. case 'r': c = '\r'; break; // CRRIAGE RETURN U+000D
  342. case 't': c = '\t'; break; // HORIZONTAL TABULATION U+0009
  343. case '\\':
  344. case '|':
  345. case '.':
  346. case '^':
  347. case '-':
  348. case '?':
  349. case '*':
  350. case '+':
  351. case '{':
  352. case '}':
  353. case '(':
  354. case ')':
  355. case '[':
  356. case ']':
  357. break; // return actucal char
  358. default:
  359. throw ex("parser.process.1", this.offset-2);
  360. }
  361. return c;
  362. }
  363. static private Hashtable ranges = null;
  364. static private Hashtable ranges2 = null;
  365. static synchronized protected RangeToken getRange(String name, boolean positive) {
  366. if (ranges == null) {
  367. ranges = new Hashtable();
  368. ranges2 = new Hashtable();
  369. Token tok = Token.createRange();
  370. setupRange(tok, SPACES);
  371. ranges.put("xml:isSpace", tok);
  372. ranges2.put("xml:isSpace", Token.complementRanges(tok));
  373. tok = Token.createRange();
  374. setupRange(tok, DIGITS);
  375. ranges.put("xml:isDigit", tok);
  376. ranges2.put("xml:isDigit", Token.complementRanges(tok));
  377. tok = Token.createRange();
  378. setupRange(tok, DIGITS);
  379. ranges.put("xml:isDigit", tok);
  380. ranges2.put("xml:isDigit", Token.complementRanges(tok));
  381. tok = Token.createRange();
  382. setupRange(tok, LETTERS);
  383. tok.mergeRanges((Token)ranges.get("xml:isDigit"));
  384. ranges.put("xml:isWord", tok);
  385. ranges2.put("xml:isWord", Token.complementRanges(tok));
  386. tok = Token.createRange();
  387. setupRange(tok, NAMECHARS);
  388. ranges.put("xml:isNameChar", tok);
  389. ranges2.put("xml:isNameChar", Token.complementRanges(tok));
  390. tok = Token.createRange();
  391. setupRange(tok, LETTERS);
  392. tok.addRange('_', '_');
  393. tok.addRange(':', ':');
  394. ranges.put("xml:isInitialNameChar", tok);
  395. ranges2.put("xml:isInitialNameChar", Token.complementRanges(tok));
  396. }
  397. RangeToken tok = positive ? (RangeToken)ranges.get(name)
  398. : (RangeToken)ranges2.get(name);
  399. return tok;
  400. }
  401. static void setupRange(Token range, String src) {
  402. int len = src.length();
  403. for (int i = 0; i < len; i += 2)
  404. range.addRange(src.charAt(i), src.charAt(i+1));
  405. }
  406. private static final String SPACES = "\t\n\r\r ";
  407. private static final String NAMECHARS =
  408. "\u002d\u002e\u0030\u003a\u0041\u005a\u005f\u005f\u0061\u007a\u00b7\u00b7\u00c0\u00d6"
  409. +"\u00d8\u00f6\u00f8\u0131\u0134\u013e\u0141\u0148\u014a\u017e\u0180\u01c3\u01cd\u01f0"
  410. +"\u01f4\u01f5\u01fa\u0217\u0250\u02a8\u02bb\u02c1\u02d0\u02d1\u0300\u0345\u0360\u0361"
  411. +"\u0386\u038a\u038c\u038c\u038e\u03a1\u03a3\u03ce\u03d0\u03d6\u03da\u03da\u03dc\u03dc"
  412. +"\u03de\u03de\u03e0\u03e0\u03e2\u03f3\u0401\u040c\u040e\u044f\u0451\u045c\u045e\u0481"
  413. +"\u0483\u0486\u0490\u04c4\u04c7\u04c8\u04cb\u04cc\u04d0\u04eb\u04ee\u04f5\u04f8\u04f9"
  414. +"\u0531\u0556\u0559\u0559\u0561\u0586\u0591\u05a1\u05a3\u05b9\u05bb\u05bd\u05bf\u05bf"
  415. +"\u05c1\u05c2\u05c4\u05c4\u05d0\u05ea\u05f0\u05f2\u0621\u063a\u0640\u0652\u0660\u0669"
  416. +"\u0670\u06b7\u06ba\u06be\u06c0\u06ce\u06d0\u06d3\u06d5\u06e8\u06ea\u06ed\u06f0\u06f9"
  417. +"\u0901\u0903\u0905\u0939\u093c\u094d\u0951\u0954\u0958\u0963\u0966\u096f\u0981\u0983"
  418. +"\u0985\u098c\u098f\u0990\u0993\u09a8\u09aa\u09b0\u09b2\u09b2\u09b6\u09b9\u09bc\u09bc"
  419. +"\u09be\u09c4\u09c7\u09c8\u09cb\u09cd\u09d7\u09d7\u09dc\u09dd\u09df\u09e3\u09e6\u09f1"
  420. +"\u0a02\u0a02\u0a05\u0a0a\u0a0f\u0a10\u0a13\u0a28\u0a2a\u0a30\u0a32\u0a33\u0a35\u0a36"
  421. +"\u0a38\u0a39\u0a3c\u0a3c\u0a3e\u0a42\u0a47\u0a48\u0a4b\u0a4d\u0a59\u0a5c\u0a5e\u0a5e"
  422. +"\u0a66\u0a74\u0a81\u0a83\u0a85\u0a8b\u0a8d\u0a8d\u0a8f\u0a91\u0a93\u0aa8\u0aaa\u0ab0"
  423. +"\u0ab2\u0ab3\u0ab5\u0ab9\u0abc\u0ac5\u0ac7\u0ac9\u0acb\u0acd\u0ae0\u0ae0\u0ae6\u0aef"
  424. +"\u0b01\u0b03\u0b05\u0b0c\u0b0f\u0b10\u0b13\u0b28\u0b2a\u0b30\u0b32\u0b33\u0b36\u0b39"
  425. +"\u0b3c\u0b43\u0b47\u0b48\u0b4b\u0b4d\u0b56\u0b57\u0b5c\u0b5d\u0b5f\u0b61\u0b66\u0b6f"
  426. +"\u0b82\u0b83\u0b85\u0b8a\u0b8e\u0b90\u0b92\u0b95\u0b99\u0b9a\u0b9c\u0b9c\u0b9e\u0b9f"
  427. +"\u0ba3\u0ba4\u0ba8\u0baa\u0bae\u0bb5\u0bb7\u0bb9\u0bbe\u0bc2\u0bc6\u0bc8\u0bca\u0bcd"
  428. +"\u0bd7\u0bd7\u0be7\u0bef\u0c01\u0c03\u0c05\u0c0c\u0c0e\u0c10\u0c12\u0c28\u0c2a\u0c33"
  429. +"\u0c35\u0c39\u0c3e\u0c44\u0c46\u0c48\u0c4a\u0c4d\u0c55\u0c56\u0c60\u0c61\u0c66\u0c6f"
  430. +"\u0c82\u0c83\u0c85\u0c8c\u0c8e\u0c90\u0c92\u0ca8\u0caa\u0cb3\u0cb5\u0cb9\u0cbe\u0cc4"
  431. +"\u0cc6\u0cc8\u0cca\u0ccd\u0cd5\u0cd6\u0cde\u0cde\u0ce0\u0ce1\u0ce6\u0cef\u0d02\u0d03"
  432. +"\u0d05\u0d0c\u0d0e\u0d10\u0d12\u0d28\u0d2a\u0d39\u0d3e\u0d43\u0d46\u0d48\u0d4a\u0d4d"
  433. +"\u0d57\u0d57\u0d60\u0d61\u0d66\u0d6f\u0e01\u0e2e\u0e30\u0e3a\u0e40\u0e4e\u0e50\u0e59"
  434. +"\u0e81\u0e82\u0e84\u0e84\u0e87\u0e88\u0e8a\u0e8a\u0e8d\u0e8d\u0e94\u0e97\u0e99\u0e9f"
  435. +"\u0ea1\u0ea3\u0ea5\u0ea5\u0ea7\u0ea7\u0eaa\u0eab\u0ead\u0eae\u0eb0\u0eb9\u0ebb\u0ebd"
  436. +"\u0ec0\u0ec4\u0ec6\u0ec6\u0ec8\u0ecd\u0ed0\u0ed9\u0f18\u0f19\u0f20\u0f29\u0f35\u0f35"
  437. +"\u0f37\u0f37\u0f39\u0f39\u0f3e\u0f47\u0f49\u0f69\u0f71\u0f84\u0f86\u0f8b\u0f90\u0f95"
  438. +"\u0f97\u0f97\u0f99\u0fad\u0fb1\u0fb7\u0fb9\u0fb9\u10a0\u10c5\u10d0\u10f6\u1100\u1100"
  439. +"\u1102\u1103\u1105\u1107\u1109\u1109\u110b\u110c\u110e\u1112\u113c\u113c\u113e\u113e"
  440. +"\u1140\u1140\u114c\u114c\u114e\u114e\u1150\u1150\u1154\u1155\u1159\u1159\u115f\u1161"
  441. +"\u1163\u1163\u1165\u1165\u1167\u1167\u1169\u1169\u116d\u116e\u1172\u1173\u1175\u1175"
  442. +"\u119e\u119e\u11a8\u11a8\u11ab\u11ab\u11ae\u11af\u11b7\u11b8\u11ba\u11ba\u11bc\u11c2"
  443. +"\u11eb\u11eb\u11f0\u11f0\u11f9\u11f9\u1e00\u1e9b\u1ea0\u1ef9\u1f00\u1f15\u1f18\u1f1d"
  444. +"\u1f20\u1f45\u1f48\u1f4d\u1f50\u1f57\u1f59\u1f59\u1f5b\u1f5b\u1f5d\u1f5d\u1f5f\u1f7d"
  445. +"\u1f80\u1fb4\u1fb6\u1fbc\u1fbe\u1fbe\u1fc2\u1fc4\u1fc6\u1fcc\u1fd0\u1fd3\u1fd6\u1fdb"
  446. +"\u1fe0\u1fec\u1ff2\u1ff4\u1ff6\u1ffc\u20d0\u20dc\u20e1\u20e1\u2126\u2126\u212a\u212b"
  447. +"\u212e\u212e\u2180\u2182\u3005\u3005\u3007\u3007\u3021\u302f\u3031\u3035\u3041\u3094"
  448. +"\u3099\u309a\u309d\u309e\u30a1\u30fa\u30fc\u30fe\u3105\u312c\u4e00\u9fa5\uac00\ud7a3"
  449. +"";
  450. private static final String LETTERS =
  451. "\u0041\u005a\u0061\u007a\u00c0\u00d6\u00d8\u00f6\u00f8\u0131\u0134\u013e\u0141\u0148"
  452. +"\u014a\u017e\u0180\u01c3\u01cd\u01f0\u01f4\u01f5\u01fa\u0217\u0250\u02a8\u02bb\u02c1"
  453. +"\u0386\u0386\u0388\u038a\u038c\u038c\u038e\u03a1\u03a3\u03ce\u03d0\u03d6\u03da\u03da"
  454. +"\u03dc\u03dc\u03de\u03de\u03e0\u03e0\u03e2\u03f3\u0401\u040c\u040e\u044f\u0451\u045c"
  455. +"\u045e\u0481\u0490\u04c4\u04c7\u04c8\u04cb\u04cc\u04d0\u04eb\u04ee\u04f5\u04f8\u04f9"
  456. +"\u0531\u0556\u0559\u0559\u0561\u0586\u05d0\u05ea\u05f0\u05f2\u0621\u063a\u0641\u064a"
  457. +"\u0671\u06b7\u06ba\u06be\u06c0\u06ce\u06d0\u06d3\u06d5\u06d5\u06e5\u06e6\u0905\u0939"
  458. +"\u093d\u093d\u0958\u0961\u0985\u098c\u098f\u0990\u0993\u09a8\u09aa\u09b0\u09b2\u09b2"
  459. +"\u09b6\u09b9\u09dc\u09dd\u09df\u09e1\u09f0\u09f1\u0a05\u0a0a\u0a0f\u0a10\u0a13\u0a28"
  460. +"\u0a2a\u0a30\u0a32\u0a33\u0a35\u0a36\u0a38\u0a39\u0a59\u0a5c\u0a5e\u0a5e\u0a72\u0a74"
  461. +"\u0a85\u0a8b\u0a8d\u0a8d\u0a8f\u0a91\u0a93\u0aa8\u0aaa\u0ab0\u0ab2\u0ab3\u0ab5\u0ab9"
  462. +"\u0abd\u0abd\u0ae0\u0ae0\u0b05\u0b0c\u0b0f\u0b10\u0b13\u0b28\u0b2a\u0b30\u0b32\u0b33"
  463. +"\u0b36\u0b39\u0b3d\u0b3d\u0b5c\u0b5d\u0b5f\u0b61\u0b85\u0b8a\u0b8e\u0b90\u0b92\u0b95"
  464. +"\u0b99\u0b9a\u0b9c\u0b9c\u0b9e\u0b9f\u0ba3\u0ba4\u0ba8\u0baa\u0bae\u0bb5\u0bb7\u0bb9"
  465. +"\u0c05\u0c0c\u0c0e\u0c10\u0c12\u0c28\u0c2a\u0c33\u0c35\u0c39\u0c60\u0c61\u0c85\u0c8c"
  466. +"\u0c8e\u0c90\u0c92\u0ca8\u0caa\u0cb3\u0cb5\u0cb9\u0cde\u0cde\u0ce0\u0ce1\u0d05\u0d0c"
  467. +"\u0d0e\u0d10\u0d12\u0d28\u0d2a\u0d39\u0d60\u0d61\u0e01\u0e2e\u0e30\u0e30\u0e32\u0e33"
  468. +"\u0e40\u0e45\u0e81\u0e82\u0e84\u0e84\u0e87\u0e88\u0e8a\u0e8a\u0e8d\u0e8d\u0e94\u0e97"
  469. +"\u0e99\u0e9f\u0ea1\u0ea3\u0ea5\u0ea5\u0ea7\u0ea7\u0eaa\u0eab\u0ead\u0eae\u0eb0\u0eb0"
  470. +"\u0eb2\u0eb3\u0ebd\u0ebd\u0ec0\u0ec4\u0f40\u0f47\u0f49\u0f69\u10a0\u10c5\u10d0\u10f6"
  471. +"\u1100\u1100\u1102\u1103\u1105\u1107\u1109\u1109\u110b\u110c\u110e\u1112\u113c\u113c"
  472. +"\u113e\u113e\u1140\u1140\u114c\u114c\u114e\u114e\u1150\u1150\u1154\u1155\u1159\u1159"
  473. +"\u115f\u1161\u1163\u1163\u1165\u1165\u1167\u1167\u1169\u1169\u116d\u116e\u1172\u1173"
  474. +"\u1175\u1175\u119e\u119e\u11a8\u11a8\u11ab\u11ab\u11ae\u11af\u11b7\u11b8\u11ba\u11ba"
  475. +"\u11bc\u11c2\u11eb\u11eb\u11f0\u11f0\u11f9\u11f9\u1e00\u1e9b\u1ea0\u1ef9\u1f00\u1f15"
  476. +"\u1f18\u1f1d\u1f20\u1f45\u1f48\u1f4d\u1f50\u1f57\u1f59\u1f59\u1f5b\u1f5b\u1f5d\u1f5d"
  477. +"\u1f5f\u1f7d\u1f80\u1fb4\u1fb6\u1fbc\u1fbe\u1fbe\u1fc2\u1fc4\u1fc6\u1fcc\u1fd0\u1fd3"
  478. +"\u1fd6\u1fdb\u1fe0\u1fec\u1ff2\u1ff4\u1ff6\u1ffc\u2126\u2126\u212a\u212b\u212e\u212e"
  479. +"\u2180\u2182\u3007\u3007\u3021\u3029\u3041\u3094\u30a1\u30fa\u3105\u312c\u4e00\u9fa5"
  480. +"\uac00\ud7a3";
  481. private static final String DIGITS =
  482. "\u0030\u0039\u0660\u0669\u06F0\u06F9\u0966\u096F\u09E6\u09EF\u0A66\u0A6F\u0AE6\u0AEF"
  483. +"\u0B66\u0B6F\u0BE7\u0BEF\u0C66\u0C6F\u0CE6\u0CEF\u0D66\u0D6F\u0E50\u0E59\u0ED0\u0ED9"
  484. +"\u0F20\u0F29";
  485. }