1. /*
  2. * The Apache Software License, Version 1.1
  3. *
  4. *
  5. * Copyright (c) 1999-2002 The Apache Software Foundation. All rights
  6. * reserved.
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions
  10. * are met:
  11. *
  12. * 1. Redistributions of source code must retain the above copyright
  13. * notice, this list of conditions and the following disclaimer.
  14. *
  15. * 2. Redistributions in binary form must reproduce the above copyright
  16. * notice, this list of conditions and the following disclaimer in
  17. * the documentation and/or other materials provided with the
  18. * distribution.
  19. *
  20. * 3. The end-user documentation included with the redistribution,
  21. * if any, must include the following acknowledgment:
  22. * "This product includes software developed by the
  23. * Apache Software Foundation (http://www.apache.org/)."
  24. * Alternately, this acknowledgment may appear in the software itself,
  25. * if and wherever such third-party acknowledgments normally appear.
  26. *
  27. * 4. The names "Xerces" and "Apache Software Foundation" must
  28. * not be used to endorse or promote products derived from this
  29. * software without prior written permission. For written
  30. * permission, please contact apache@apache.org.
  31. *
  32. * 5. Products derived from this software may not be called "Apache",
  33. * nor may "Apache" appear in their name, without prior written
  34. * permission of the Apache Software Foundation.
  35. *
  36. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  37. * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  38. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  39. * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  40. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  41. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  42. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  43. * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  44. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  45. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  46. * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  47. * SUCH DAMAGE.
  48. * ====================================================================
  49. *
  50. * This software consists of voluntary contributions made by many
  51. * individuals on behalf of the Apache Software Foundation and was
  52. * originally based on software copyright (c) 1999, International
  53. * Business Machines, Inc., http://www.apache.org. For more
  54. * information on the Apache Software Foundation, please see
  55. * <http://www.apache.org/>.
  56. */
  57. package org.apache.xml.utils;
  58. /**
  59. * This class defines the basic XML character properties. The data
  60. * in this class can be used to verify that a character is a valid
  61. * XML character or if the character is a space, name start, or name
  62. * character.
  63. * <p>
  64. * A series of convenience methods are supplied to ease the burden
  65. * of the developer. Because inlining the checks can improve per
  66. * character performance, the tables of character properties are
  67. * public. Using the character as an index into the <code>CHARS</code>
  68. * array and applying the appropriate mask flag (e.g.
  69. * <code>MASK_VALID</code>), yields the same results as calling the
  70. * convenience methods. There is one exception: check the comments
  71. * for the <code>isValid</code> method for details.
  72. *
  73. * @author Glenn Marcy, IBM
  74. * @author Andy Clark, IBM
  75. * @author Eric Ye, IBM
  76. * @author Arnaud Le Hors, IBM
  77. * @author Rahul Srivastava, Sun Microsystems Inc.
  78. *
  79. * @version $Id: XMLChar.java,v 1.1 2002/07/10 17:09:23 mmidy Exp $
  80. */
  81. public class XMLChar {
  82. //
  83. // Constants
  84. //
  85. /** Character flags. */
  86. public static final byte[] CHARS = new byte[1 << 16];
  87. /** Valid character mask. */
  88. public static final int MASK_VALID = 0x01;
  89. /** Space character mask. */
  90. public static final int MASK_SPACE = 0x02;
  91. /** Name start character mask. */
  92. public static final int MASK_NAME_START = 0x04;
  93. /** Name character mask. */
  94. public static final int MASK_NAME = 0x08;
  95. /** Pubid character mask. */
  96. public static final int MASK_PUBID = 0x10;
  97. /**
  98. * Content character mask. Special characters are those that can
  99. * be considered the start of markup, such as '<' and '&'.
  100. * The various newline characters are considered special as well.
  101. * All other valid XML characters can be considered content.
  102. * <p>
  103. * This is an optimization for the inner loop of character scanning.
  104. */
  105. public static final int MASK_CONTENT = 0x20;
  106. /** NCName start character mask. */
  107. public static final int MASK_NCNAME_START = 0x40;
  108. /** NCName character mask. */
  109. public static final int MASK_NCNAME = 0x80;
  110. //
  111. // Static initialization
  112. //
  113. static {
  114. //
  115. // [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] |
  116. // [#xE000-#xFFFD] | [#x10000-#x10FFFF]
  117. //
  118. int charRange[] = {
  119. 0x0009, 0x000A, 0x000D, 0x000D, 0x0020, 0xD7FF, 0xE000, 0xFFFD,
  120. };
  121. //
  122. // [3] S ::= (#x20 | #x9 | #xD | #xA)+
  123. //
  124. int spaceChar[] = {
  125. 0x0020, 0x0009, 0x000D, 0x000A,
  126. };
  127. //
  128. // [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
  129. // CombiningChar | Extender
  130. //
  131. int nameChar[] = {
  132. 0x002D, 0x002E, // '-' and '.'
  133. };
  134. //
  135. // [5] Name ::= (Letter | '_' | ':') (NameChar)*
  136. //
  137. int nameStartChar[] = {
  138. 0x003A, 0x005F, // ':' and '_'
  139. };
  140. //
  141. // [13] PubidChar ::= #x20 | 0xD | 0xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
  142. //
  143. int pubidChar[] = {
  144. 0x000A, 0x000D, 0x0020, 0x0021, 0x0023, 0x0024, 0x0025, 0x003D,
  145. 0x005F
  146. };
  147. int pubidRange[] = {
  148. 0x0027, 0x003B, 0x003F, 0x005A, 0x0061, 0x007A
  149. };
  150. //
  151. // [84] Letter ::= BaseChar | Ideographic
  152. //
  153. int letterRange[] = {
  154. // BaseChar
  155. 0x0041, 0x005A, 0x0061, 0x007A, 0x00C0, 0x00D6, 0x00D8, 0x00F6,
  156. 0x00F8, 0x0131, 0x0134, 0x013E, 0x0141, 0x0148, 0x014A, 0x017E,
  157. 0x0180, 0x01C3, 0x01CD, 0x01F0, 0x01F4, 0x01F5, 0x01FA, 0x0217,
  158. 0x0250, 0x02A8, 0x02BB, 0x02C1, 0x0388, 0x038A, 0x038E, 0x03A1,
  159. 0x03A3, 0x03CE, 0x03D0, 0x03D6, 0x03E2, 0x03F3, 0x0401, 0x040C,
  160. 0x040E, 0x044F, 0x0451, 0x045C, 0x045E, 0x0481, 0x0490, 0x04C4,
  161. 0x04C7, 0x04C8, 0x04CB, 0x04CC, 0x04D0, 0x04EB, 0x04EE, 0x04F5,
  162. 0x04F8, 0x04F9, 0x0531, 0x0556, 0x0561, 0x0586, 0x05D0, 0x05EA,
  163. 0x05F0, 0x05F2, 0x0621, 0x063A, 0x0641, 0x064A, 0x0671, 0x06B7,
  164. 0x06BA, 0x06BE, 0x06C0, 0x06CE, 0x06D0, 0x06D3, 0x06E5, 0x06E6,
  165. 0x0905, 0x0939, 0x0958, 0x0961, 0x0985, 0x098C, 0x098F, 0x0990,
  166. 0x0993, 0x09A8, 0x09AA, 0x09B0, 0x09B6, 0x09B9, 0x09DC, 0x09DD,
  167. 0x09DF, 0x09E1, 0x09F0, 0x09F1, 0x0A05, 0x0A0A, 0x0A0F, 0x0A10,
  168. 0x0A13, 0x0A28, 0x0A2A, 0x0A30, 0x0A32, 0x0A33, 0x0A35, 0x0A36,
  169. 0x0A38, 0x0A39, 0x0A59, 0x0A5C, 0x0A72, 0x0A74, 0x0A85, 0x0A8B,
  170. 0x0A8F, 0x0A91, 0x0A93, 0x0AA8, 0x0AAA, 0x0AB0, 0x0AB2, 0x0AB3,
  171. 0x0AB5, 0x0AB9, 0x0B05, 0x0B0C, 0x0B0F, 0x0B10, 0x0B13, 0x0B28,
  172. 0x0B2A, 0x0B30, 0x0B32, 0x0B33, 0x0B36, 0x0B39, 0x0B5C, 0x0B5D,
  173. 0x0B5F, 0x0B61, 0x0B85, 0x0B8A, 0x0B8E, 0x0B90, 0x0B92, 0x0B95,
  174. 0x0B99, 0x0B9A, 0x0B9E, 0x0B9F, 0x0BA3, 0x0BA4, 0x0BA8, 0x0BAA,
  175. 0x0BAE, 0x0BB5, 0x0BB7, 0x0BB9, 0x0C05, 0x0C0C, 0x0C0E, 0x0C10,
  176. 0x0C12, 0x0C28, 0x0C2A, 0x0C33, 0x0C35, 0x0C39, 0x0C60, 0x0C61,
  177. 0x0C85, 0x0C8C, 0x0C8E, 0x0C90, 0x0C92, 0x0CA8, 0x0CAA, 0x0CB3,
  178. 0x0CB5, 0x0CB9, 0x0CE0, 0x0CE1, 0x0D05, 0x0D0C, 0x0D0E, 0x0D10,
  179. 0x0D12, 0x0D28, 0x0D2A, 0x0D39, 0x0D60, 0x0D61, 0x0E01, 0x0E2E,
  180. 0x0E32, 0x0E33, 0x0E40, 0x0E45, 0x0E81, 0x0E82, 0x0E87, 0x0E88,
  181. 0x0E94, 0x0E97, 0x0E99, 0x0E9F, 0x0EA1, 0x0EA3, 0x0EAA, 0x0EAB,
  182. 0x0EAD, 0x0EAE, 0x0EB2, 0x0EB3, 0x0EC0, 0x0EC4, 0x0F40, 0x0F47,
  183. 0x0F49, 0x0F69, 0x10A0, 0x10C5, 0x10D0, 0x10F6, 0x1102, 0x1103,
  184. 0x1105, 0x1107, 0x110B, 0x110C, 0x110E, 0x1112, 0x1154, 0x1155,
  185. 0x115F, 0x1161, 0x116D, 0x116E, 0x1172, 0x1173, 0x11AE, 0x11AF,
  186. 0x11B7, 0x11B8, 0x11BC, 0x11C2, 0x1E00, 0x1E9B, 0x1EA0, 0x1EF9,
  187. 0x1F00, 0x1F15, 0x1F18, 0x1F1D, 0x1F20, 0x1F45, 0x1F48, 0x1F4D,
  188. 0x1F50, 0x1F57, 0x1F5F, 0x1F7D, 0x1F80, 0x1FB4, 0x1FB6, 0x1FBC,
  189. 0x1FC2, 0x1FC4, 0x1FC6, 0x1FCC, 0x1FD0, 0x1FD3, 0x1FD6, 0x1FDB,
  190. 0x1FE0, 0x1FEC, 0x1FF2, 0x1FF4, 0x1FF6, 0x1FFC, 0x212A, 0x212B,
  191. 0x2180, 0x2182, 0x3041, 0x3094, 0x30A1, 0x30FA, 0x3105, 0x312C,
  192. 0xAC00, 0xD7A3,
  193. // Ideographic
  194. 0x3021, 0x3029, 0x4E00, 0x9FA5,
  195. };
  196. int letterChar[] = {
  197. // BaseChar
  198. 0x0386, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0, 0x0559, 0x06D5,
  199. 0x093D, 0x09B2, 0x0A5E, 0x0A8D, 0x0ABD, 0x0AE0, 0x0B3D, 0x0B9C,
  200. 0x0CDE, 0x0E30, 0x0E84, 0x0E8A, 0x0E8D, 0x0EA5, 0x0EA7, 0x0EB0,
  201. 0x0EBD, 0x1100, 0x1109, 0x113C, 0x113E, 0x1140, 0x114C, 0x114E,
  202. 0x1150, 0x1159, 0x1163, 0x1165, 0x1167, 0x1169, 0x1175, 0x119E,
  203. 0x11A8, 0x11AB, 0x11BA, 0x11EB, 0x11F0, 0x11F9, 0x1F59, 0x1F5B,
  204. 0x1F5D, 0x1FBE, 0x2126, 0x212E,
  205. // Ideographic
  206. 0x3007,
  207. };
  208. //
  209. // [87] CombiningChar ::= ...
  210. //
  211. int combiningCharRange[] = {
  212. 0x0300, 0x0345, 0x0360, 0x0361, 0x0483, 0x0486, 0x0591, 0x05A1,
  213. 0x05A3, 0x05B9, 0x05BB, 0x05BD, 0x05C1, 0x05C2, 0x064B, 0x0652,
  214. 0x06D6, 0x06DC, 0x06DD, 0x06DF, 0x06E0, 0x06E4, 0x06E7, 0x06E8,
  215. 0x06EA, 0x06ED, 0x0901, 0x0903, 0x093E, 0x094C, 0x0951, 0x0954,
  216. 0x0962, 0x0963, 0x0981, 0x0983, 0x09C0, 0x09C4, 0x09C7, 0x09C8,
  217. 0x09CB, 0x09CD, 0x09E2, 0x09E3, 0x0A40, 0x0A42, 0x0A47, 0x0A48,
  218. 0x0A4B, 0x0A4D, 0x0A70, 0x0A71, 0x0A81, 0x0A83, 0x0ABE, 0x0AC5,
  219. 0x0AC7, 0x0AC9, 0x0ACB, 0x0ACD, 0x0B01, 0x0B03, 0x0B3E, 0x0B43,
  220. 0x0B47, 0x0B48, 0x0B4B, 0x0B4D, 0x0B56, 0x0B57, 0x0B82, 0x0B83,
  221. 0x0BBE, 0x0BC2, 0x0BC6, 0x0BC8, 0x0BCA, 0x0BCD, 0x0C01, 0x0C03,
  222. 0x0C3E, 0x0C44, 0x0C46, 0x0C48, 0x0C4A, 0x0C4D, 0x0C55, 0x0C56,
  223. 0x0C82, 0x0C83, 0x0CBE, 0x0CC4, 0x0CC6, 0x0CC8, 0x0CCA, 0x0CCD,
  224. 0x0CD5, 0x0CD6, 0x0D02, 0x0D03, 0x0D3E, 0x0D43, 0x0D46, 0x0D48,
  225. 0x0D4A, 0x0D4D, 0x0E34, 0x0E3A, 0x0E47, 0x0E4E, 0x0EB4, 0x0EB9,
  226. 0x0EBB, 0x0EBC, 0x0EC8, 0x0ECD, 0x0F18, 0x0F19, 0x0F71, 0x0F84,
  227. 0x0F86, 0x0F8B, 0x0F90, 0x0F95, 0x0F99, 0x0FAD, 0x0FB1, 0x0FB7,
  228. 0x20D0, 0x20DC, 0x302A, 0x302F,
  229. };
  230. int combiningCharChar[] = {
  231. 0x05BF, 0x05C4, 0x0670, 0x093C, 0x094D, 0x09BC, 0x09BE, 0x09BF,
  232. 0x09D7, 0x0A02, 0x0A3C, 0x0A3E, 0x0A3F, 0x0ABC, 0x0B3C, 0x0BD7,
  233. 0x0D57, 0x0E31, 0x0EB1, 0x0F35, 0x0F37, 0x0F39, 0x0F3E, 0x0F3F,
  234. 0x0F97, 0x0FB9, 0x20E1, 0x3099, 0x309A,
  235. };
  236. //
  237. // [88] Digit ::= ...
  238. //
  239. int digitRange[] = {
  240. 0x0030, 0x0039, 0x0660, 0x0669, 0x06F0, 0x06F9, 0x0966, 0x096F,
  241. 0x09E6, 0x09EF, 0x0A66, 0x0A6F, 0x0AE6, 0x0AEF, 0x0B66, 0x0B6F,
  242. 0x0BE7, 0x0BEF, 0x0C66, 0x0C6F, 0x0CE6, 0x0CEF, 0x0D66, 0x0D6F,
  243. 0x0E50, 0x0E59, 0x0ED0, 0x0ED9, 0x0F20, 0x0F29,
  244. };
  245. //
  246. // [89] Extender ::= ...
  247. //
  248. int extenderRange[] = {
  249. 0x3031, 0x3035, 0x309D, 0x309E, 0x30FC, 0x30FE,
  250. };
  251. int extenderChar[] = {
  252. 0x00B7, 0x02D0, 0x02D1, 0x0387, 0x0640, 0x0E46, 0x0EC6, 0x3005,
  253. };
  254. //
  255. // SpecialChar ::= '<', '&', '\n', '\r', ']'
  256. //
  257. int specialChar[] = {
  258. '<', '&', '\n', '\r', ']',
  259. };
  260. //
  261. // Initialize
  262. //
  263. // set valid characters
  264. for (int i = 0; i < charRange.length; i += 2) {
  265. for (int j = charRange[i]; j <= charRange[i + 1]; j++) {
  266. CHARS[j] |= MASK_VALID | MASK_CONTENT;
  267. }
  268. }
  269. // remove special characters
  270. for (int i = 0; i < specialChar.length; i++) {
  271. CHARS[specialChar[i]] = (byte)(CHARS[specialChar[i]] & ~MASK_CONTENT);
  272. }
  273. // set space characters
  274. for (int i = 0; i < spaceChar.length; i++) {
  275. CHARS[spaceChar[i]] |= MASK_SPACE;
  276. }
  277. // set name start characters
  278. for (int i = 0; i < nameStartChar.length; i++) {
  279. CHARS[nameStartChar[i]] |= MASK_NAME_START | MASK_NAME |
  280. MASK_NCNAME_START | MASK_NCNAME;
  281. }
  282. for (int i = 0; i < letterRange.length; i += 2) {
  283. for (int j = letterRange[i]; j <= letterRange[i + 1]; j++) {
  284. CHARS[j] |= MASK_NAME_START | MASK_NAME |
  285. MASK_NCNAME_START | MASK_NCNAME;
  286. }
  287. }
  288. for (int i = 0; i < letterChar.length; i++) {
  289. CHARS[letterChar[i]] |= MASK_NAME_START | MASK_NAME |
  290. MASK_NCNAME_START | MASK_NCNAME;
  291. }
  292. // set name characters
  293. for (int i = 0; i < nameChar.length; i++) {
  294. CHARS[nameChar[i]] |= MASK_NAME | MASK_NCNAME;
  295. }
  296. for (int i = 0; i < digitRange.length; i += 2) {
  297. for (int j = digitRange[i]; j <= digitRange[i + 1]; j++) {
  298. CHARS[j] |= MASK_NAME | MASK_NCNAME;
  299. }
  300. }
  301. for (int i = 0; i < combiningCharRange.length; i += 2) {
  302. for (int j = combiningCharRange[i]; j <= combiningCharRange[i + 1]; j++) {
  303. CHARS[j] |= MASK_NAME | MASK_NCNAME;
  304. }
  305. }
  306. for (int i = 0; i < combiningCharChar.length; i++) {
  307. CHARS[combiningCharChar[i]] |= MASK_NAME | MASK_NCNAME;
  308. }
  309. for (int i = 0; i < extenderRange.length; i += 2) {
  310. for (int j = extenderRange[i]; j <= extenderRange[i + 1]; j++) {
  311. CHARS[j] |= MASK_NAME | MASK_NCNAME;
  312. }
  313. }
  314. for (int i = 0; i < extenderChar.length; i++) {
  315. CHARS[extenderChar[i]] |= MASK_NAME | MASK_NCNAME;
  316. }
  317. // remove ':' from allowable MASK_NCNAME_START and MASK_NCNAME chars
  318. CHARS[':'] &= ~(MASK_NCNAME_START | MASK_NCNAME);
  319. // set Pubid characters
  320. for (int i = 0; i < pubidChar.length; i++) {
  321. CHARS[pubidChar[i]] |= MASK_PUBID;
  322. }
  323. for (int i = 0; i < pubidRange.length; i += 2) {
  324. for (int j = pubidRange[i]; j <= pubidRange[i + 1]; j++) {
  325. CHARS[j] |= MASK_PUBID;
  326. }
  327. }
  328. } // <clinit>()
  329. //
  330. // Public static methods
  331. //
  332. /**
  333. * Returns true if the specified character is a supplemental character.
  334. *
  335. * @param c The character to check.
  336. */
  337. public static boolean isSupplemental(int c) {
  338. return (c >= 0x10000 && c <= 0x10FFFF);
  339. }
  340. /**
  341. * Returns true the supplemental character corresponding to the given
  342. * surrogates.
  343. *
  344. * @param h The high surrogate.
  345. * @param l The low surrogate.
  346. */
  347. public static int supplemental(char h, char l) {
  348. return (h - 0xD800) * 0x400 + (l - 0xDC00) + 0x10000;
  349. }
  350. /**
  351. * Returns the high surrogate of a supplemental character
  352. *
  353. * @param c The supplemental character to "split".
  354. */
  355. public static char highSurrogate(int c) {
  356. return (char) (((c - 0x00010000) >> 10) + 0xD800);
  357. }
  358. /**
  359. * Returns the low surrogate of a supplemental character
  360. *
  361. * @param c The supplemental character to "split".
  362. */
  363. public static char lowSurrogate(int c) {
  364. return (char) (((c - 0x00010000) & 0x3FF) + 0xDC00);
  365. }
  366. /**
  367. * Returns whether the given character is a high surrogate
  368. *
  369. * @param c The character to check.
  370. */
  371. public static boolean isHighSurrogate(int c) {
  372. return (0xD800 <= c && c <= 0xDBFF);
  373. }
  374. /**
  375. * Returns whether the given character is a low surrogate
  376. *
  377. * @param c The character to check.
  378. */
  379. public static boolean isLowSurrogate(int c) {
  380. return (0xDC00 <= c && c <= 0xDFFF);
  381. }
  382. /**
  383. * Returns true if the specified character is valid. This method
  384. * also checks the surrogate character range from 0x10000 to 0x10FFFF.
  385. * <p>
  386. * If the program chooses to apply the mask directly to the
  387. * <code>CHARS</code> array, then they are responsible for checking
  388. * the surrogate character range.
  389. *
  390. * @param c The character to check.
  391. */
  392. public static boolean isValid(int c) {
  393. return (c < 0x10000 && (CHARS[c] & MASK_VALID) != 0) ||
  394. (0x10000 <= c && c <= 0x10FFFF);
  395. } // isValid(int):boolean
  396. /**
  397. * Returns true if the specified character is invalid.
  398. *
  399. * @param c The character to check.
  400. */
  401. public static boolean isInvalid(int c) {
  402. return !isValid(c);
  403. } // isInvalid(int):boolean
  404. /**
  405. * Returns true if the specified character can be considered content.
  406. *
  407. * @param c The character to check.
  408. */
  409. public static boolean isContent(int c) {
  410. return (c < 0x10000 && (CHARS[c] & MASK_CONTENT) != 0) ||
  411. (0x10000 <= c && c <= 0x10FFFF);
  412. } // isContent(int):boolean
  413. /**
  414. * Returns true if the specified character can be considered markup.
  415. * Markup characters include '<', '&', and '%'.
  416. *
  417. * @param c The character to check.
  418. */
  419. public static boolean isMarkup(int c) {
  420. return c == '<' || c == '&' || c == '%';
  421. } // isMarkup(int):boolean
  422. /**
  423. * Returns true if the specified character is a space character
  424. * as defined by production [3] in the XML 1.0 specification.
  425. *
  426. * @param c The character to check.
  427. */
  428. public static boolean isSpace(int c) {
  429. return c < 0x10000 && (CHARS[c] & MASK_SPACE) != 0;
  430. } // isSpace(int):boolean
  431. /**
  432. * Returns true if the specified character is a valid name start
  433. * character as defined by production [5] in the XML 1.0
  434. * specification.
  435. *
  436. * @param c The character to check.
  437. */
  438. public static boolean isNameStart(int c) {
  439. return c < 0x10000 && (CHARS[c] & MASK_NAME_START) != 0;
  440. } // isNameStart(int):boolean
  441. /**
  442. * Returns true if the specified character is a valid name
  443. * character as defined by production [4] in the XML 1.0
  444. * specification.
  445. *
  446. * @param c The character to check.
  447. */
  448. public static boolean isName(int c) {
  449. return c < 0x10000 && (CHARS[c] & MASK_NAME) != 0;
  450. } // isName(int):boolean
  451. /**
  452. * Returns true if the specified character is a valid NCName start
  453. * character as defined by production [4] in Namespaces in XML
  454. * recommendation.
  455. *
  456. * @param c The character to check.
  457. */
  458. public static boolean isNCNameStart(int c) {
  459. return c < 0x10000 && (CHARS[c] & MASK_NCNAME_START) != 0;
  460. } // isNCNameStart(int):boolean
  461. /**
  462. * Returns true if the specified character is a valid NCName
  463. * character as defined by production [5] in Namespaces in XML
  464. * recommendation.
  465. *
  466. * @param c The character to check.
  467. */
  468. public static boolean isNCName(int c) {
  469. return c < 0x10000 && (CHARS[c] & MASK_NCNAME) != 0;
  470. } // isNCName(int):boolean
  471. /**
  472. * Returns true if the specified character is a valid Pubid
  473. * character as defined by production [13] in the XML 1.0
  474. * specification.
  475. *
  476. * @param c The character to check.
  477. */
  478. public static boolean isPubid(int c) {
  479. return c < 0x10000 && (CHARS[c] & MASK_PUBID) != 0;
  480. } // isPubid(int):boolean
  481. /*
  482. * [5] Name ::= (Letter | '_' | ':') (NameChar)*
  483. */
  484. /**
  485. * Check to see if a string is a valid Name according to [5]
  486. * in the XML 1.0 Recommendation
  487. *
  488. * @param name string to check
  489. * @return true if name is a valid Name
  490. */
  491. public static boolean isValidName(String name) {
  492. if (name.length() == 0)
  493. return false;
  494. char ch = name.charAt(0);
  495. if( isNameStart(ch) == false)
  496. return false;
  497. for (int i = 1; i < name.length(); i++ ) {
  498. ch = name.charAt(i);
  499. if( isName( ch ) == false ){
  500. return false;
  501. }
  502. }
  503. return true;
  504. } // isValidName(String):boolean
  505. /*
  506. * from the namespace rec
  507. * [4] NCName ::= (Letter | '_') (NCNameChar)*
  508. */
  509. /**
  510. * Check to see if a string is a valid NCName according to [4]
  511. * from the XML Namespaces 1.0 Recommendation
  512. *
  513. * @param name string to check
  514. * @return true if name is a valid NCName
  515. */
  516. public static boolean isValidNCName(String ncName) {
  517. if (ncName.length() == 0)
  518. return false;
  519. char ch = ncName.charAt(0);
  520. if( isNCNameStart(ch) == false)
  521. return false;
  522. for (int i = 1; i < ncName.length(); i++ ) {
  523. ch = ncName.charAt(i);
  524. if( isNCName( ch ) == false ){
  525. return false;
  526. }
  527. }
  528. return true;
  529. } // isValidNCName(String):boolean
  530. /*
  531. * [7] Nmtoken ::= (NameChar)+
  532. */
  533. /**
  534. * Check to see if a string is a valid Nmtoken according to [7]
  535. * in the XML 1.0 Recommendation
  536. *
  537. * @param nmtoken string to check
  538. * @return true if nmtoken is a valid Nmtoken
  539. */
  540. public static boolean isValidNmtoken(String nmtoken) {
  541. if (nmtoken.length() == 0)
  542. return false;
  543. for (int i = 0; i < nmtoken.length(); i++ ) {
  544. char ch = nmtoken.charAt(i);
  545. if( ! isName( ch ) ){
  546. return false;
  547. }
  548. }
  549. return true;
  550. } // isValidName(String):boolean
  551. // encodings
  552. /**
  553. * Returns true if the encoding name is a valid IANA encoding.
  554. * This method does not verify that there is a decoder available
  555. * for this encoding, only that the characters are valid for an
  556. * IANA encoding name.
  557. *
  558. * @param ianaEncoding The IANA encoding name.
  559. */
  560. public static boolean isValidIANAEncoding(String ianaEncoding) {
  561. if (ianaEncoding != null) {
  562. int length = ianaEncoding.length();
  563. if (length > 0) {
  564. char c = ianaEncoding.charAt(0);
  565. if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
  566. for (int i = 1; i < length; i++) {
  567. c = ianaEncoding.charAt(i);
  568. if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') &&
  569. (c < '0' || c > '9') && c != '.' && c != '_' &&
  570. c != '-') {
  571. return false;
  572. }
  573. }
  574. return true;
  575. }
  576. }
  577. }
  578. return false;
  579. } // isValidIANAEncoding(String):boolean
  580. /**
  581. * Returns true if the encoding name is a valid Java encoding.
  582. * This method does not verify that there is a decoder available
  583. * for this encoding, only that the characters are valid for an
  584. * Java encoding name.
  585. *
  586. * @param javaEncoding The Java encoding name.
  587. */
  588. public static boolean isValidJavaEncoding(String javaEncoding) {
  589. if (javaEncoding != null) {
  590. int length = javaEncoding.length();
  591. if (length > 0) {
  592. for (int i = 1; i < length; i++) {
  593. char c = javaEncoding.charAt(i);
  594. if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') &&
  595. (c < '0' || c > '9') && c != '.' && c != '_' &&
  596. c != '-') {
  597. return false;
  598. }
  599. }
  600. return true;
  601. }
  602. }
  603. return false;
  604. } // isValidIANAEncoding(String):boolean
  605. } // class XMLChar