1. /*
  2. * The Apache Software License, Version 1.1
  3. *
  4. *
  5. * Copyright (c) 1999-2004 The Apache Software Foundation. All rights
  6. * reserved.
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions
  10. * are met:
  11. *
  12. * 1. Redistributions of source code must retain the above copyright
  13. * notice, this list of conditions and the following disclaimer.
  14. *
  15. * 2. Redistributions in binary form must reproduce the above copyright
  16. * notice, this list of conditions and the following disclaimer in
  17. * the documentation and/or other materials provided with the
  18. * distribution.
  19. *
  20. * 3. The end-user documentation included with the redistribution,
  21. * if any, must include the following acknowledgment:
  22. * "This product includes software developed by the
  23. * Apache Software Foundation (http://www.apache.org/)."
  24. * Alternately, this acknowledgment may appear in the software itself,
  25. * if and wherever such third-party acknowledgments normally appear.
  26. *
  27. * 4. The names "Xerces" and "Apache Software Foundation" must
  28. * not be used to endorse or promote products derived from this
  29. * software without prior written permission. For written
  30. * permission, please contact apache@apache.org.
  31. *
  32. * 5. Products derived from this software may not be called "Apache",
  33. * nor may "Apache" appear in their name, without prior written
  34. * permission of the Apache Software Foundation.
  35. *
  36. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  37. * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  38. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  39. * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  40. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  41. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  42. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  43. * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  44. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  45. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  46. * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  47. * SUCH DAMAGE.
  48. * ====================================================================
  49. *
  50. * This software consists of voluntary contributions made by many
  51. * individuals on behalf of the Apache Software Foundation and was
  52. * originally based on software copyright (c) 1999, International
  53. * Business Machines, Inc., http://www.apache.org. For more
  54. * information on the Apache Software Foundation, please see
  55. * <http://www.apache.org/>.
  56. */
  57. package com.sun.org.apache.xerces.internal.util;
  58. import java.util.Arrays;
  59. /**
  60. * This class defines the basic properties of characters in XML 1.1. The data
  61. * in this class can be used to verify that a character is a valid
  62. * XML 1.1 character or if the character is a space, name start, or name
  63. * character.
  64. * <p>
  65. * A series of convenience methods are supplied to ease the burden
  66. * of the developer. Using the character as an index into the <code>XML11CHARS</code>
  67. * array and applying the appropriate mask flag (e.g.
  68. * <code>MASK_VALID</code>), yields the same results as calling the
  69. * convenience methods. There is one exception: check the comments
  70. * for the <code>isValid</code> method for details.
  71. *
  72. * @author Glenn Marcy, IBM
  73. * @author Andy Clark, IBM
  74. * @author Arnaud Le Hors, IBM
  75. * @author Neil Graham, IBM
  76. * @author Michael Glavassevich, IBM
  77. *
  78. * @version $Id: XML11Char.java,v 1.6 2004/02/03 20:34:27 mrglavas Exp $
  79. */
  80. public class XML11Char {
  81. //
  82. // Constants
  83. //
  84. /** Character flags for XML 1.1. */
  85. private static final byte XML11CHARS [] = new byte [1 << 16];
  86. /** XML 1.1 Valid character mask. */
  87. public static final int MASK_XML11_VALID = 0x01;
  88. /** XML 1.1 Space character mask. */
  89. public static final int MASK_XML11_SPACE = 0x02;
  90. /** XML 1.1 Name start character mask. */
  91. public static final int MASK_XML11_NAME_START = 0x04;
  92. /** XML 1.1 Name character mask. */
  93. public static final int MASK_XML11_NAME = 0x08;
  94. /** XML 1.1 control character mask */
  95. public static final int MASK_XML11_CONTROL = 0x10;
  96. /** XML 1.1 content for external entities (valid - "special" chars - control chars) */
  97. public static final int MASK_XML11_CONTENT = 0x20;
  98. /** XML namespaces 1.1 NCNameStart */
  99. public static final int MASK_XML11_NCNAME_START = 0x40;
  100. /** XML namespaces 1.1 NCName */
  101. public static final int MASK_XML11_NCNAME = 0x80;
  102. /** XML 1.1 content for internal entities (valid - "special" chars) */
  103. public static final int MASK_XML11_CONTENT_INTERNAL = MASK_XML11_CONTROL | MASK_XML11_CONTENT;
  104. //
  105. // Static initialization
  106. //
  107. static {
  108. // Initializing the Character Flag Array
  109. // Code generated by: XML11CharGenerator.
  110. Arrays.fill(XML11CHARS, 1, 9, (byte) 17 ); // Fill 8 of value (byte) 17
  111. XML11CHARS[9] = 35;
  112. XML11CHARS[10] = 3;
  113. Arrays.fill(XML11CHARS, 11, 13, (byte) 17 ); // Fill 2 of value (byte) 17
  114. XML11CHARS[13] = 3;
  115. Arrays.fill(XML11CHARS, 14, 32, (byte) 17 ); // Fill 18 of value (byte) 17
  116. XML11CHARS[32] = 35;
  117. Arrays.fill(XML11CHARS, 33, 38, (byte) 33 ); // Fill 5 of value (byte) 33
  118. XML11CHARS[38] = 1;
  119. Arrays.fill(XML11CHARS, 39, 45, (byte) 33 ); // Fill 6 of value (byte) 33
  120. Arrays.fill(XML11CHARS, 45, 47, (byte) -87 ); // Fill 2 of value (byte) -87
  121. XML11CHARS[47] = 33;
  122. Arrays.fill(XML11CHARS, 48, 58, (byte) -87 ); // Fill 10 of value (byte) -87
  123. XML11CHARS[58] = 45;
  124. XML11CHARS[59] = 33;
  125. XML11CHARS[60] = 1;
  126. Arrays.fill(XML11CHARS, 61, 65, (byte) 33 ); // Fill 4 of value (byte) 33
  127. Arrays.fill(XML11CHARS, 65, 91, (byte) -19 ); // Fill 26 of value (byte) -19
  128. Arrays.fill(XML11CHARS, 91, 93, (byte) 33 ); // Fill 2 of value (byte) 33
  129. XML11CHARS[93] = 1;
  130. XML11CHARS[94] = 33;
  131. XML11CHARS[95] = -19;
  132. XML11CHARS[96] = 33;
  133. Arrays.fill(XML11CHARS, 97, 123, (byte) -19 ); // Fill 26 of value (byte) -19
  134. Arrays.fill(XML11CHARS, 123, 127, (byte) 33 ); // Fill 4 of value (byte) 33
  135. Arrays.fill(XML11CHARS, 127, 133, (byte) 17 ); // Fill 6 of value (byte) 17
  136. XML11CHARS[133] = 35;
  137. Arrays.fill(XML11CHARS, 134, 160, (byte) 17 ); // Fill 26 of value (byte) 17
  138. Arrays.fill(XML11CHARS, 160, 183, (byte) 33 ); // Fill 23 of value (byte) 33
  139. XML11CHARS[183] = -87;
  140. Arrays.fill(XML11CHARS, 184, 192, (byte) 33 ); // Fill 8 of value (byte) 33
  141. Arrays.fill(XML11CHARS, 192, 215, (byte) -19 ); // Fill 23 of value (byte) -19
  142. XML11CHARS[215] = 33;
  143. Arrays.fill(XML11CHARS, 216, 247, (byte) -19 ); // Fill 31 of value (byte) -19
  144. XML11CHARS[247] = 33;
  145. Arrays.fill(XML11CHARS, 248, 768, (byte) -19 ); // Fill 520 of value (byte) -19
  146. Arrays.fill(XML11CHARS, 768, 880, (byte) -87 ); // Fill 112 of value (byte) -87
  147. Arrays.fill(XML11CHARS, 880, 894, (byte) -19 ); // Fill 14 of value (byte) -19
  148. XML11CHARS[894] = 33;
  149. Arrays.fill(XML11CHARS, 895, 8192, (byte) -19 ); // Fill 7297 of value (byte) -19
  150. Arrays.fill(XML11CHARS, 8192, 8204, (byte) 33 ); // Fill 12 of value (byte) 33
  151. Arrays.fill(XML11CHARS, 8204, 8206, (byte) -19 ); // Fill 2 of value (byte) -19
  152. Arrays.fill(XML11CHARS, 8206, 8232, (byte) 33 ); // Fill 26 of value (byte) 33
  153. XML11CHARS[8232] = 35;
  154. Arrays.fill(XML11CHARS, 8233, 8255, (byte) 33 ); // Fill 22 of value (byte) 33
  155. Arrays.fill(XML11CHARS, 8255, 8257, (byte) -87 ); // Fill 2 of value (byte) -87
  156. Arrays.fill(XML11CHARS, 8257, 8304, (byte) 33 ); // Fill 47 of value (byte) 33
  157. Arrays.fill(XML11CHARS, 8304, 8592, (byte) -19 ); // Fill 288 of value (byte) -19
  158. Arrays.fill(XML11CHARS, 8592, 11264, (byte) 33 ); // Fill 2672 of value (byte) 33
  159. Arrays.fill(XML11CHARS, 11264, 12272, (byte) -19 ); // Fill 1008 of value (byte) -19
  160. Arrays.fill(XML11CHARS, 12272, 12289, (byte) 33 ); // Fill 17 of value (byte) 33
  161. Arrays.fill(XML11CHARS, 12289, 55296, (byte) -19 ); // Fill 43007 of value (byte) -19
  162. Arrays.fill(XML11CHARS, 57344, 63744, (byte) 33 ); // Fill 6400 of value (byte) 33
  163. Arrays.fill(XML11CHARS, 63744, 64976, (byte) -19 ); // Fill 1232 of value (byte) -19
  164. Arrays.fill(XML11CHARS, 64976, 65008, (byte) 33 ); // Fill 32 of value (byte) 33
  165. Arrays.fill(XML11CHARS, 65008, 65534, (byte) -19 ); // Fill 526 of value (byte) -19
  166. } // <clinit>()
  167. //
  168. // Public static methods
  169. //
  170. /**
  171. * Returns true if the specified character is a space character
  172. * as amdended in the XML 1.1 specification.
  173. *
  174. * @param c The character to check.
  175. */
  176. public static boolean isXML11Space(int c) {
  177. return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_SPACE) != 0);
  178. } // isXML11Space(int):boolean
  179. /**
  180. * Returns true if the specified character is valid. This method
  181. * also checks the surrogate character range from 0x10000 to 0x10FFFF.
  182. * <p>
  183. * If the program chooses to apply the mask directly to the
  184. * <code>XML11CHARS</code> array, then they are responsible for checking
  185. * the surrogate character range.
  186. *
  187. * @param c The character to check.
  188. */
  189. public static boolean isXML11Valid(int c) {
  190. return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_VALID) != 0)
  191. || (0x10000 <= c && c <= 0x10FFFF);
  192. } // isXML11Valid(int):boolean
  193. /**
  194. * Returns true if the specified character is invalid.
  195. *
  196. * @param c The character to check.
  197. */
  198. public static boolean isXML11Invalid(int c) {
  199. return !isXML11Valid(c);
  200. } // isXML11Invalid(int):boolean
  201. /**
  202. * Returns true if the specified character is valid and permitted outside
  203. * of a character reference.
  204. * That is, this method will return false for the same set as
  205. * isXML11Valid, except it also reports false for "control characters".
  206. *
  207. * @param c The character to check.
  208. */
  209. public static boolean isXML11ValidLiteral(int c) {
  210. return ((c < 0x10000 && ((XML11CHARS[c] & MASK_XML11_VALID) != 0 && (XML11CHARS[c] & MASK_XML11_CONTROL) == 0))
  211. || (0x10000 <= c && c <= 0x10FFFF));
  212. } // isXML11ValidLiteral(int):boolean
  213. /**
  214. * Returns true if the specified character can be considered
  215. * content in an external parsed entity.
  216. *
  217. * @param c The character to check.
  218. */
  219. public static boolean isXML11Content(int c) {
  220. return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_CONTENT) != 0) ||
  221. (0x10000 <= c && c <= 0x10FFFF);
  222. } // isXML11Content(int):boolean
  223. /**
  224. * Returns true if the specified character can be considered
  225. * content in an internal parsed entity.
  226. *
  227. * @param c The character to check.
  228. */
  229. public static boolean isXML11InternalEntityContent(int c) {
  230. return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_CONTENT_INTERNAL) != 0) ||
  231. (0x10000 <= c && c <= 0x10FFFF);
  232. } // isXML11InternalEntityContent(int):boolean
  233. /**
  234. * Returns true if the specified character is a valid name start
  235. * character as defined by production [4] in the XML 1.1
  236. * specification.
  237. *
  238. * @param c The character to check.
  239. */
  240. public static boolean isXML11NameStart(int c) {
  241. return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NAME_START) != 0)
  242. || (0x10000 <= c && c < 0xF0000);
  243. } // isXML11NameStart(int):boolean
  244. /**
  245. * Returns true if the specified character is a valid name
  246. * character as defined by production [4a] in the XML 1.1
  247. * specification.
  248. *
  249. * @param c The character to check.
  250. */
  251. public static boolean isXML11Name(int c) {
  252. return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NAME) != 0)
  253. || (c >= 0x10000 && c < 0xF0000);
  254. } // isXML11Name(int):boolean
  255. /**
  256. * Returns true if the specified character is a valid NCName start
  257. * character as defined by production [4] in Namespaces in XML
  258. * 1.1 recommendation.
  259. *
  260. * @param c The character to check.
  261. */
  262. public static boolean isXML11NCNameStart(int c) {
  263. return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NCNAME_START) != 0)
  264. || (0x10000 <= c && c < 0xF0000);
  265. } // isXML11NCNameStart(int):boolean
  266. /**
  267. * Returns true if the specified character is a valid NCName
  268. * character as defined by production [5] in Namespaces in XML
  269. * 1.1 recommendation.
  270. *
  271. * @param c The character to check.
  272. */
  273. public static boolean isXML11NCName(int c) {
  274. return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NCNAME) != 0)
  275. || (0x10000 <= c && c < 0xF0000);
  276. } // isXML11NCName(int):boolean
  277. /**
  278. * Returns whether the given character is a valid
  279. * high surrogate for a name character. This includes
  280. * all high surrogates for characters [0x10000-0xEFFFF].
  281. * In other words everything excluding planes 15 and 16.
  282. *
  283. * @param c The character to check.
  284. */
  285. public static boolean isXML11NameHighSurrogate(int c) {
  286. return (0xD800 <= c && c <= 0xDB7F);
  287. }
  288. /*
  289. * [5] Name ::= NameStartChar NameChar*
  290. */
  291. /**
  292. * Check to see if a string is a valid Name according to [5]
  293. * in the XML 1.1 Recommendation
  294. *
  295. * @param name string to check
  296. * @return true if name is a valid Name
  297. */
  298. public static boolean isXML11ValidName(String name) {
  299. int length = name.length();
  300. if (length == 0)
  301. return false;
  302. int i = 1;
  303. char ch = name.charAt(0);
  304. if( !isXML11NameStart(ch) ) {
  305. if ( length > 1 && isXML11NameHighSurrogate(ch) ) {
  306. char ch2 = name.charAt(1);
  307. if ( !XMLChar.isLowSurrogate(ch2) ||
  308. !isXML11NameStart(XMLChar.supplemental(ch, ch2)) ) {
  309. return false;
  310. }
  311. i = 2;
  312. }
  313. else {
  314. return false;
  315. }
  316. }
  317. while (i < length) {
  318. ch = name.charAt(i);
  319. if ( !isXML11Name(ch) ) {
  320. if ( ++i < length && isXML11NameHighSurrogate(ch) ) {
  321. char ch2 = name.charAt(i);
  322. if ( !XMLChar.isLowSurrogate(ch2) ||
  323. !isXML11Name(XMLChar.supplemental(ch, ch2)) ) {
  324. return false;
  325. }
  326. }
  327. else {
  328. return false;
  329. }
  330. }
  331. ++i;
  332. }
  333. return true;
  334. } // isXML11ValidName(String):boolean
  335. /*
  336. * from the namespace 1.1 rec
  337. * [4] NCName ::= NCNameStartChar NCNameChar*
  338. */
  339. /**
  340. * Check to see if a string is a valid NCName according to [4]
  341. * from the XML Namespaces 1.1 Recommendation
  342. *
  343. * @param name string to check
  344. * @return true if name is a valid NCName
  345. */
  346. public static boolean isXML11ValidNCName(String ncName) {
  347. int length = ncName.length();
  348. if (length == 0)
  349. return false;
  350. int i = 1;
  351. char ch = ncName.charAt(0);
  352. if( !isXML11NCNameStart(ch) ) {
  353. if ( length > 1 && isXML11NameHighSurrogate(ch) ) {
  354. char ch2 = ncName.charAt(1);
  355. if ( !XMLChar.isLowSurrogate(ch2) ||
  356. !isXML11NCNameStart(XMLChar.supplemental(ch, ch2)) ) {
  357. return false;
  358. }
  359. i = 2;
  360. }
  361. else {
  362. return false;
  363. }
  364. }
  365. while (i < length) {
  366. ch = ncName.charAt(i);
  367. if ( !isXML11NCName(ch) ) {
  368. if ( ++i < length && isXML11NameHighSurrogate(ch) ) {
  369. char ch2 = ncName.charAt(i);
  370. if ( !XMLChar.isLowSurrogate(ch2) ||
  371. !isXML11NCName(XMLChar.supplemental(ch, ch2)) ) {
  372. return false;
  373. }
  374. }
  375. else {
  376. return false;
  377. }
  378. }
  379. ++i;
  380. }
  381. return true;
  382. } // isXML11ValidNCName(String):boolean
  383. /*
  384. * [7] Nmtoken ::= (NameChar)+
  385. */
  386. /**
  387. * Check to see if a string is a valid Nmtoken according to [7]
  388. * in the XML 1.1 Recommendation
  389. *
  390. * @param nmtoken string to check
  391. * @return true if nmtoken is a valid Nmtoken
  392. */
  393. public static boolean isXML11ValidNmtoken(String nmtoken) {
  394. int length = nmtoken.length();
  395. if (length == 0)
  396. return false;
  397. for (int i = 0; i < length; ++i ) {
  398. char ch = nmtoken.charAt(i);
  399. if( !isXML11Name(ch) ) {
  400. if ( ++i < length && isXML11NameHighSurrogate(ch) ) {
  401. char ch2 = nmtoken.charAt(i);
  402. if ( !XMLChar.isLowSurrogate(ch2) ||
  403. !isXML11Name(XMLChar.supplemental(ch, ch2)) ) {
  404. return false;
  405. }
  406. }
  407. else {
  408. return false;
  409. }
  410. }
  411. }
  412. return true;
  413. } // isXML11ValidName(String):boolean
  414. } // class XML11Char