1. /*
  2. * The Apache Software License, Version 1.1
  3. *
  4. *
  5. * Copyright (c) 2001-2004 The Apache Software Foundation. All rights
  6. * reserved.
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions
  10. * are met:
  11. *
  12. * 1. Redistributions of source code must retain the above copyright
  13. * notice, this list of conditions and the following disclaimer.
  14. *
  15. * 2. Redistributions in binary form must reproduce the above copyright
  16. * notice, this list of conditions and the following disclaimer in
  17. * the documentation and/or other materials provided with the
  18. * distribution.
  19. *
  20. * 3. The end-user documentation included with the redistribution,
  21. * if any, must include the following acknowledgment:
  22. * "This product includes software developed by the
  23. * Apache Software Foundation (http://www.apache.org/)."
  24. * Alternately, this acknowledgment may appear in the software itself,
  25. * if and wherever such third-party acknowledgments normally appear.
  26. *
  27. * 4. The names "Xerces" and "Apache Software Foundation" must
  28. * not be used to endorse or promote products derived from this
  29. * software without prior written permission. For written
  30. * permission, please contact apache@apache.org.
  31. *
  32. * 5. Products derived from this software may not be called "Apache",
  33. * nor may "Apache" appear in their name, without prior written
  34. * permission of the Apache Software Foundation.
  35. *
  36. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  37. * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  38. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  39. * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  40. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  41. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  42. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  43. * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  44. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  45. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  46. * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  47. * SUCH DAMAGE.
  48. * ====================================================================
  49. *
  50. * This software consists of voluntary contributions made by many
  51. * individuals on behalf of the Apache Software Foundation and was
  52. * originally based on software copyright (c) 2001, International
  53. * Business Machines, Inc., http://www.apache.org. For more
  54. * information on the Apache Software Foundation, please see
  55. * <http://www.apache.org/>.
  56. */
  57. package com.sun.org.apache.xerces.internal.impl.dv.xs;
  58. import com.sun.org.apache.xerces.internal.impl.dv.InvalidDatatypeValueException;
  59. import com.sun.org.apache.xerces.internal.util.URI;
  60. import com.sun.org.apache.xerces.internal.impl.dv.ValidationContext;
  61. /**
  62. * Represent the schema type "anyURI"
  63. *
  64. * @author Neeraj Bajaj, Sun Microsystems, inc.
  65. * @author Sandy Gao, IBM
  66. *
  67. * @version $Id: AnyURIDV.java,v 1.5 2004/01/20 17:01:53 sandygao Exp $
  68. */
  69. public class AnyURIDV extends TypeValidator {
  70. private static final URI BASE_URI;
  71. static {
  72. URI uri = null;
  73. try {
  74. uri = new URI("abc://def.ghi.jkl");
  75. } catch (URI.MalformedURIException ex) {
  76. }
  77. BASE_URI = uri;
  78. }
  79. public short getAllowedFacets(){
  80. return (XSSimpleTypeDecl.FACET_LENGTH | XSSimpleTypeDecl.FACET_MINLENGTH | XSSimpleTypeDecl.FACET_MAXLENGTH | XSSimpleTypeDecl.FACET_PATTERN | XSSimpleTypeDecl.FACET_ENUMERATION | XSSimpleTypeDecl.FACET_WHITESPACE );
  81. }
  82. // before we return string we have to make sure it is correct URI as per spec.
  83. // for some types (string and derived), they just return the string itself
  84. public Object getActualValue(String content, ValidationContext context) throws InvalidDatatypeValueException {
  85. // check 3.2.17.c0 must: URI (rfc 2396/2723)
  86. try {
  87. if( content.length() != 0 ) {
  88. // encode special characters using XLink 5.4 algorithm
  89. content = encode(content);
  90. // Support for relative URLs
  91. // According to Java 1.1: URLs may also be specified with a
  92. // String and the URL object that it is related to.
  93. new URI(BASE_URI, content );
  94. }
  95. } catch (URI.MalformedURIException ex) {
  96. throw new InvalidDatatypeValueException("cvc-datatype-valid.1.2.1", new Object[]{content, "anyURI"});
  97. }
  98. // REVISIT: do we need to return the new URI object?
  99. return content;
  100. }
  101. // which ASCII characters need to be escaped
  102. private static boolean gNeedEscaping[] = new boolean[128];
  103. // the first hex character if a character needs to be escaped
  104. private static char gAfterEscaping1[] = new char[128];
  105. // the second hex character if a character needs to be escaped
  106. private static char gAfterEscaping2[] = new char[128];
  107. private static char[] gHexChs = {'0', '1', '2', '3', '4', '5', '6', '7',
  108. '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
  109. // initialize the above 3 arrays
  110. static {
  111. for (int i = 0; i <= 0x1f; i++) {
  112. gNeedEscaping[i] = true;
  113. gAfterEscaping1[i] = gHexChs[i >> 4];
  114. gAfterEscaping2[i] = gHexChs[i & 0xf];
  115. }
  116. gNeedEscaping[0x7f] = true;
  117. gAfterEscaping1[0x7f] = '7';
  118. gAfterEscaping2[0x7f] = 'F';
  119. char[] escChs = {' ', '<', '>', '"', '{', '}',
  120. '|', '\\', '^', '~', '`'};
  121. int len = escChs.length;
  122. char ch;
  123. for (int i = 0; i < len; i++) {
  124. ch = escChs[i];
  125. gNeedEscaping[ch] = true;
  126. gAfterEscaping1[ch] = gHexChs[ch >> 4];
  127. gAfterEscaping2[ch] = gHexChs[ch & 0xf];
  128. }
  129. }
  130. // To encode special characters in anyURI, by using %HH to represent
  131. // special ASCII characters: 0x00~0x1F, 0x7F, ' ', '<', '>', etc.
  132. // and non-ASCII characters (whose value >= 128).
  133. private static String encode(String anyURI){
  134. int len = anyURI.length(), ch;
  135. StringBuffer buffer = new StringBuffer(len*3);
  136. // for each character in the anyURI
  137. int i = 0;
  138. for (; i < len; i++) {
  139. ch = anyURI.charAt(i);
  140. // if it's not an ASCII character, break here, and use UTF-8 encoding
  141. if (ch >= 128)
  142. break;
  143. if (gNeedEscaping[ch]) {
  144. buffer.append('%');
  145. buffer.append(gAfterEscaping1[ch]);
  146. buffer.append(gAfterEscaping2[ch]);
  147. }
  148. else {
  149. buffer.append((char)ch);
  150. }
  151. }
  152. // we saw some non-ascii character
  153. if (i < len) {
  154. // get UTF-8 bytes for the remaining sub-string
  155. byte[] bytes = null;
  156. byte b;
  157. try {
  158. bytes = anyURI.substring(i).getBytes("UTF-8");
  159. } catch (java.io.UnsupportedEncodingException e) {
  160. // should never happen
  161. return anyURI;
  162. }
  163. len = bytes.length;
  164. // for each byte
  165. for (i = 0; i < len; i++) {
  166. b = bytes[i];
  167. // for non-ascii character: make it positive, then escape
  168. if (b < 0) {
  169. ch = b + 256;
  170. buffer.append('%');
  171. buffer.append(gHexChs[ch >> 4]);
  172. buffer.append(gHexChs[ch & 0xf]);
  173. }
  174. else if (gNeedEscaping[b]) {
  175. buffer.append('%');
  176. buffer.append(gAfterEscaping1[b]);
  177. buffer.append(gAfterEscaping2[b]);
  178. }
  179. else {
  180. buffer.append((char)b);
  181. }
  182. }
  183. }
  184. // If encoding happened, create a new string;
  185. // otherwise, return the orginal one.
  186. if (buffer.length() != len)
  187. return buffer.toString();
  188. else
  189. return anyURI;
  190. }
  191. } // class AnyURIDV