1. /*
  2. * The Apache Software License, Version 1.1
  3. *
  4. *
  5. * Copyright (c) 1999-2003 The Apache Software Foundation. All rights
  6. * reserved.
  7. *
  8. * Redistribution and use in source and binary forms, with or without
  9. * modification, are permitted provided that the following conditions
  10. * are met:
  11. *
  12. * 1. Redistributions of source code must retain the above copyright
  13. * notice, this list of conditions and the following disclaimer.
  14. *
  15. * 2. Redistributions in binary form must reproduce the above copyright
  16. * notice, this list of conditions and the following disclaimer in
  17. * the documentation and/or other materials provided with the
  18. * distribution.
  19. *
  20. * 3. The end-user documentation included with the redistribution,
  21. * if any, must include the following acknowledgment:
  22. * "This product includes software developed by the
  23. * Apache Software Foundation (http://www.apache.org/)."
  24. * Alternately, this acknowledgment may appear in the software itself,
  25. * if and wherever such third-party acknowledgments normally appear.
  26. *
  27. * 4. The names "Xerces" and "Apache Software Foundation" must
  28. * not be used to endorse or promote products derived from this
  29. * software without prior written permission. For written
  30. * permission, please contact apache@apache.org.
  31. *
  32. * 5. Products derived from this software may not be called "Apache",
  33. * nor may "Apache" appear in their name, without prior written
  34. * permission of the Apache Software Foundation.
  35. *
  36. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  37. * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  38. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  39. * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  40. * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  41. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  42. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  43. * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  44. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  45. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  46. * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  47. * SUCH DAMAGE.
  48. * ====================================================================
  49. *
  50. * This software consists of voluntary contributions made by many
  51. * individuals on behalf of the Apache Software Foundation and was
  52. * originally based on software copyright (c) 1999, iClick Inc.,
  53. * http://www.apache.org. For more information on the Apache Software
  54. * Foundation, please see <http://www.apache.org/>.
  55. */
  56. package com.sun.org.apache.xerces.internal.util;
  57. import java.io.IOException;
  58. import java.io.Serializable;
  59. /**********************************************************************
  60. * A class to represent a Uniform Resource Identifier (URI). This class
  61. * is designed to handle the parsing of URIs and provide access to
  62. * the various components (scheme, host, port, userinfo, path, query
  63. * string and fragment) that may constitute a URI.
  64. * <p>
  65. * Parsing of a URI specification is done according to the URI
  66. * syntax described in
  67. * <a href="http://www.ietf.org/rfc/rfc2396.txt?number=2396">RFC 2396</a>,
  68. * and amended by
  69. * <a href="http://www.ietf.org/rfc/rfc2732.txt?number=2732">RFC 2732</a>.
  70. * <p>
  71. * Every absolute URI consists of a scheme, followed by a colon (':'),
  72. * followed by a scheme-specific part. For URIs that follow the
  73. * "generic URI" syntax, the scheme-specific part begins with two
  74. * slashes ("//") and may be followed by an authority segment (comprised
  75. * of user information, host, and port), path segment, query segment
  76. * and fragment. Note that RFC 2396 no longer specifies the use of the
  77. * parameters segment and excludes the "user:password" syntax as part of
  78. * the authority segment. If "user:password" appears in a URI, the entire
  79. * user/password string is stored as userinfo.
  80. * <p>
  81. * For URIs that do not follow the "generic URI" syntax (e.g. mailto),
  82. * the entire scheme-specific part is treated as the "path" portion
  83. * of the URI.
  84. * <p>
  85. * Note that, unlike the java.net.URL class, this class does not provide
  86. * any built-in network access functionality nor does it provide any
  87. * scheme-specific functionality (for example, it does not know a
  88. * default port for a specific scheme). Rather, it only knows the
  89. * grammar and basic set of operations that can be applied to a URI.
  90. *
  91. * @version $Id: URI.java,v 1.17 2004/03/28 16:12:19 mrglavas Exp $
  92. *
  93. **********************************************************************/
  94. public class URI implements Serializable {
  95. /*******************************************************************
  96. * MalformedURIExceptions are thrown in the process of building a URI
  97. * or setting fields on a URI when an operation would result in an
  98. * invalid URI specification.
  99. *
  100. ********************************************************************/
  101. public static class MalformedURIException extends IOException {
  102. /******************************************************************
  103. * Constructs a <code>MalformedURIException</code> with no specified
  104. * detail message.
  105. ******************************************************************/
  106. public MalformedURIException() {
  107. super();
  108. }
  109. /*****************************************************************
  110. * Constructs a <code>MalformedURIException</code> with the
  111. * specified detail message.
  112. *
  113. * @param p_msg the detail message.
  114. ******************************************************************/
  115. public MalformedURIException(String p_msg) {
  116. super(p_msg);
  117. }
  118. }
  119. private static final byte [] fgLookupTable = new byte[128];
  120. /**
  121. * Character Classes
  122. */
  123. /** reserved characters ;/?:@&=+$,[] */
  124. //RFC 2732 added '[' and ']' as reserved characters
  125. private static final int RESERVED_CHARACTERS = 0x01;
  126. /** URI punctuation mark characters: -_.!~*'() - these, combined with
  127. alphanumerics, constitute the "unreserved" characters */
  128. private static final int MARK_CHARACTERS = 0x02;
  129. /** scheme can be composed of alphanumerics and these characters: +-. */
  130. private static final int SCHEME_CHARACTERS = 0x04;
  131. /** userinfo can be composed of unreserved, escaped and these
  132. characters: ;:&=+$, */
  133. private static final int USERINFO_CHARACTERS = 0x08;
  134. /** ASCII letter characters */
  135. private static final int ASCII_ALPHA_CHARACTERS = 0x10;
  136. /** ASCII digit characters */
  137. private static final int ASCII_DIGIT_CHARACTERS = 0x20;
  138. /** ASCII hex characters */
  139. private static final int ASCII_HEX_CHARACTERS = 0x40;
  140. /** Path characters */
  141. private static final int PATH_CHARACTERS = 0x80;
  142. /** Mask for alpha-numeric characters */
  143. private static final int MASK_ALPHA_NUMERIC = ASCII_ALPHA_CHARACTERS | ASCII_DIGIT_CHARACTERS;
  144. /** Mask for unreserved characters */
  145. private static final int MASK_UNRESERVED_MASK = MASK_ALPHA_NUMERIC | MARK_CHARACTERS;
  146. /** Mask for URI allowable characters except for % */
  147. private static final int MASK_URI_CHARACTER = MASK_UNRESERVED_MASK | RESERVED_CHARACTERS;
  148. /** Mask for scheme characters */
  149. private static final int MASK_SCHEME_CHARACTER = MASK_ALPHA_NUMERIC | SCHEME_CHARACTERS;
  150. /** Mask for userinfo characters */
  151. private static final int MASK_USERINFO_CHARACTER = MASK_UNRESERVED_MASK | USERINFO_CHARACTERS;
  152. /** Mask for path characters */
  153. private static final int MASK_PATH_CHARACTER = MASK_UNRESERVED_MASK | PATH_CHARACTERS;
  154. static {
  155. // Add ASCII Digits and ASCII Hex Numbers
  156. for (int i = '0'; i <= '9'; ++i) {
  157. fgLookupTable[i] |= ASCII_DIGIT_CHARACTERS | ASCII_HEX_CHARACTERS;
  158. }
  159. // Add ASCII Letters and ASCII Hex Numbers
  160. for (int i = 'A'; i <= 'F'; ++i) {
  161. fgLookupTable[i] |= ASCII_ALPHA_CHARACTERS | ASCII_HEX_CHARACTERS;
  162. fgLookupTable[i+0x00000020] |= ASCII_ALPHA_CHARACTERS | ASCII_HEX_CHARACTERS;
  163. }
  164. // Add ASCII Letters
  165. for (int i = 'G'; i <= 'Z'; ++i) {
  166. fgLookupTable[i] |= ASCII_ALPHA_CHARACTERS;
  167. fgLookupTable[i+0x00000020] |= ASCII_ALPHA_CHARACTERS;
  168. }
  169. // Add Reserved Characters
  170. fgLookupTable[';'] |= RESERVED_CHARACTERS;
  171. fgLookupTable['/'] |= RESERVED_CHARACTERS;
  172. fgLookupTable['?'] |= RESERVED_CHARACTERS;
  173. fgLookupTable[':'] |= RESERVED_CHARACTERS;
  174. fgLookupTable['@'] |= RESERVED_CHARACTERS;
  175. fgLookupTable['&'] |= RESERVED_CHARACTERS;
  176. fgLookupTable['='] |= RESERVED_CHARACTERS;
  177. fgLookupTable['+'] |= RESERVED_CHARACTERS;
  178. fgLookupTable['$'] |= RESERVED_CHARACTERS;
  179. fgLookupTable[','] |= RESERVED_CHARACTERS;
  180. fgLookupTable['['] |= RESERVED_CHARACTERS;
  181. fgLookupTable[']'] |= RESERVED_CHARACTERS;
  182. // Add Mark Characters
  183. fgLookupTable['-'] |= MARK_CHARACTERS;
  184. fgLookupTable['_'] |= MARK_CHARACTERS;
  185. fgLookupTable['.'] |= MARK_CHARACTERS;
  186. fgLookupTable['!'] |= MARK_CHARACTERS;
  187. fgLookupTable['~'] |= MARK_CHARACTERS;
  188. fgLookupTable['*'] |= MARK_CHARACTERS;
  189. fgLookupTable['\''] |= MARK_CHARACTERS;
  190. fgLookupTable['('] |= MARK_CHARACTERS;
  191. fgLookupTable[')'] |= MARK_CHARACTERS;
  192. // Add Scheme Characters
  193. fgLookupTable['+'] |= SCHEME_CHARACTERS;
  194. fgLookupTable['-'] |= SCHEME_CHARACTERS;
  195. fgLookupTable['.'] |= SCHEME_CHARACTERS;
  196. // Add Userinfo Characters
  197. fgLookupTable[';'] |= USERINFO_CHARACTERS;
  198. fgLookupTable[':'] |= USERINFO_CHARACTERS;
  199. fgLookupTable['&'] |= USERINFO_CHARACTERS;
  200. fgLookupTable['='] |= USERINFO_CHARACTERS;
  201. fgLookupTable['+'] |= USERINFO_CHARACTERS;
  202. fgLookupTable['$'] |= USERINFO_CHARACTERS;
  203. fgLookupTable[','] |= USERINFO_CHARACTERS;
  204. // Add Path Characters
  205. fgLookupTable[';'] |= PATH_CHARACTERS;
  206. fgLookupTable['/'] |= PATH_CHARACTERS;
  207. fgLookupTable[':'] |= PATH_CHARACTERS;
  208. fgLookupTable['@'] |= PATH_CHARACTERS;
  209. fgLookupTable['&'] |= PATH_CHARACTERS;
  210. fgLookupTable['='] |= PATH_CHARACTERS;
  211. fgLookupTable['+'] |= PATH_CHARACTERS;
  212. fgLookupTable['$'] |= PATH_CHARACTERS;
  213. fgLookupTable[','] |= PATH_CHARACTERS;
  214. }
  215. /** Stores the scheme (usually the protocol) for this URI. */
  216. private String m_scheme = null;
  217. /** If specified, stores the userinfo for this URI; otherwise null */
  218. private String m_userinfo = null;
  219. /** If specified, stores the host for this URI; otherwise null */
  220. private String m_host = null;
  221. /** If specified, stores the port for this URI; otherwise -1 */
  222. private int m_port = -1;
  223. /** If specified, stores the registry based authority for this URI; otherwise -1 */
  224. private String m_regAuthority = null;
  225. /** If specified, stores the path for this URI; otherwise null */
  226. private String m_path = null;
  227. /** If specified, stores the query string for this URI; otherwise
  228. null. */
  229. private String m_queryString = null;
  230. /** If specified, stores the fragment for this URI; otherwise null */
  231. private String m_fragment = null;
  232. private static boolean DEBUG = false;
  233. /**
  234. * Construct a new and uninitialized URI.
  235. */
  236. public URI() {
  237. }
  238. /**
  239. * Construct a new URI from another URI. All fields for this URI are
  240. * set equal to the fields of the URI passed in.
  241. *
  242. * @param p_other the URI to copy (cannot be null)
  243. */
  244. public URI(URI p_other) {
  245. initialize(p_other);
  246. }
  247. /**
  248. * Construct a new URI from a URI specification string. If the
  249. * specification follows the "generic URI" syntax, (two slashes
  250. * following the first colon), the specification will be parsed
  251. * accordingly - setting the scheme, userinfo, host,port, path, query
  252. * string and fragment fields as necessary. If the specification does
  253. * not follow the "generic URI" syntax, the specification is parsed
  254. * into a scheme and scheme-specific part (stored as the path) only.
  255. *
  256. * @param p_uriSpec the URI specification string (cannot be null or
  257. * empty)
  258. *
  259. * @exception MalformedURIException if p_uriSpec violates any syntax
  260. * rules
  261. */
  262. public URI(String p_uriSpec) throws MalformedURIException {
  263. this((URI)null, p_uriSpec);
  264. }
  265. /**
  266. * Construct a new URI from a base URI and a URI specification string.
  267. * The URI specification string may be a relative URI.
  268. *
  269. * @param p_base the base URI (cannot be null if p_uriSpec is null or
  270. * empty)
  271. * @param p_uriSpec the URI specification string (cannot be null or
  272. * empty if p_base is null)
  273. *
  274. * @exception MalformedURIException if p_uriSpec violates any syntax
  275. * rules
  276. */
  277. public URI(URI p_base, String p_uriSpec) throws MalformedURIException {
  278. initialize(p_base, p_uriSpec);
  279. }
  280. /**
  281. * Construct a new URI that does not follow the generic URI syntax.
  282. * Only the scheme and scheme-specific part (stored as the path) are
  283. * initialized.
  284. *
  285. * @param p_scheme the URI scheme (cannot be null or empty)
  286. * @param p_schemeSpecificPart the scheme-specific part (cannot be
  287. * null or empty)
  288. *
  289. * @exception MalformedURIException if p_scheme violates any
  290. * syntax rules
  291. */
  292. public URI(String p_scheme, String p_schemeSpecificPart)
  293. throws MalformedURIException {
  294. if (p_scheme == null || p_scheme.trim().length() == 0) {
  295. throw new MalformedURIException(
  296. "Cannot construct URI with null/empty scheme!");
  297. }
  298. if (p_schemeSpecificPart == null ||
  299. p_schemeSpecificPart.trim().length() == 0) {
  300. throw new MalformedURIException(
  301. "Cannot construct URI with null/empty scheme-specific part!");
  302. }
  303. setScheme(p_scheme);
  304. setPath(p_schemeSpecificPart);
  305. }
  306. /**
  307. * Construct a new URI that follows the generic URI syntax from its
  308. * component parts. Each component is validated for syntax and some
  309. * basic semantic checks are performed as well. See the individual
  310. * setter methods for specifics.
  311. *
  312. * @param p_scheme the URI scheme (cannot be null or empty)
  313. * @param p_host the hostname, IPv4 address or IPv6 reference for the URI
  314. * @param p_path the URI path - if the path contains '?' or '#',
  315. * then the query string and/or fragment will be
  316. * set from the path; however, if the query and
  317. * fragment are specified both in the path and as
  318. * separate parameters, an exception is thrown
  319. * @param p_queryString the URI query string (cannot be specified
  320. * if path is null)
  321. * @param p_fragment the URI fragment (cannot be specified if path
  322. * is null)
  323. *
  324. * @exception MalformedURIException if any of the parameters violates
  325. * syntax rules or semantic rules
  326. */
  327. public URI(String p_scheme, String p_host, String p_path,
  328. String p_queryString, String p_fragment)
  329. throws MalformedURIException {
  330. this(p_scheme, null, p_host, -1, p_path, p_queryString, p_fragment);
  331. }
  332. /**
  333. * Construct a new URI that follows the generic URI syntax from its
  334. * component parts. Each component is validated for syntax and some
  335. * basic semantic checks are performed as well. See the individual
  336. * setter methods for specifics.
  337. *
  338. * @param p_scheme the URI scheme (cannot be null or empty)
  339. * @param p_userinfo the URI userinfo (cannot be specified if host
  340. * is null)
  341. * @param p_host the hostname, IPv4 address or IPv6 reference for the URI
  342. * @param p_port the URI port (may be -1 for "unspecified"; cannot
  343. * be specified if host is null)
  344. * @param p_path the URI path - if the path contains '?' or '#',
  345. * then the query string and/or fragment will be
  346. * set from the path; however, if the query and
  347. * fragment are specified both in the path and as
  348. * separate parameters, an exception is thrown
  349. * @param p_queryString the URI query string (cannot be specified
  350. * if path is null)
  351. * @param p_fragment the URI fragment (cannot be specified if path
  352. * is null)
  353. *
  354. * @exception MalformedURIException if any of the parameters violates
  355. * syntax rules or semantic rules
  356. */
  357. public URI(String p_scheme, String p_userinfo,
  358. String p_host, int p_port, String p_path,
  359. String p_queryString, String p_fragment)
  360. throws MalformedURIException {
  361. if (p_scheme == null || p_scheme.trim().length() == 0) {
  362. throw new MalformedURIException("Scheme is required!");
  363. }
  364. if (p_host == null) {
  365. if (p_userinfo != null) {
  366. throw new MalformedURIException(
  367. "Userinfo may not be specified if host is not specified!");
  368. }
  369. if (p_port != -1) {
  370. throw new MalformedURIException(
  371. "Port may not be specified if host is not specified!");
  372. }
  373. }
  374. if (p_path != null) {
  375. if (p_path.indexOf('?') != -1 && p_queryString != null) {
  376. throw new MalformedURIException(
  377. "Query string cannot be specified in path and query string!");
  378. }
  379. if (p_path.indexOf('#') != -1 && p_fragment != null) {
  380. throw new MalformedURIException(
  381. "Fragment cannot be specified in both the path and fragment!");
  382. }
  383. }
  384. setScheme(p_scheme);
  385. setHost(p_host);
  386. setPort(p_port);
  387. setUserinfo(p_userinfo);
  388. setPath(p_path);
  389. setQueryString(p_queryString);
  390. setFragment(p_fragment);
  391. }
  392. /**
  393. * Initialize all fields of this URI from another URI.
  394. *
  395. * @param p_other the URI to copy (cannot be null)
  396. */
  397. private void initialize(URI p_other) {
  398. m_scheme = p_other.getScheme();
  399. m_userinfo = p_other.getUserinfo();
  400. m_host = p_other.getHost();
  401. m_port = p_other.getPort();
  402. m_regAuthority = p_other.getRegBasedAuthority();
  403. m_path = p_other.getPath();
  404. m_queryString = p_other.getQueryString();
  405. m_fragment = p_other.getFragment();
  406. }
  407. /**
  408. * Initializes this URI from a base URI and a URI specification string.
  409. * See RFC 2396 Section 4 and Appendix B for specifications on parsing
  410. * the URI and Section 5 for specifications on resolving relative URIs
  411. * and relative paths.
  412. *
  413. * @param p_base the base URI (may be null if p_uriSpec is an absolute
  414. * URI)
  415. * @param p_uriSpec the URI spec string which may be an absolute or
  416. * relative URI (can only be null/empty if p_base
  417. * is not null)
  418. *
  419. * @exception MalformedURIException if p_base is null and p_uriSpec
  420. * is not an absolute URI or if
  421. * p_uriSpec violates syntax rules
  422. */
  423. private void initialize(URI p_base, String p_uriSpec)
  424. throws MalformedURIException {
  425. String uriSpec = p_uriSpec;
  426. int uriSpecLen = (uriSpec != null) ? uriSpec.length() : 0;
  427. if (p_base == null && uriSpecLen == 0) {
  428. throw new MalformedURIException(
  429. "Cannot initialize URI with empty parameters.");
  430. }
  431. // just make a copy of the base if spec is empty
  432. if (uriSpecLen == 0) {
  433. initialize(p_base);
  434. return;
  435. }
  436. int index = 0;
  437. // Check for scheme, which must be before '/', '?' or '#'. Also handle
  438. // names with DOS drive letters ('D:'), so 1-character schemes are not
  439. // allowed.
  440. int colonIdx = uriSpec.indexOf(':');
  441. if (colonIdx != -1) {
  442. final int searchFrom = colonIdx - 1;
  443. // search backwards starting from character before ':'.
  444. int slashIdx = uriSpec.lastIndexOf('/', searchFrom);
  445. int queryIdx = uriSpec.lastIndexOf('?', searchFrom);
  446. int fragmentIdx = uriSpec.lastIndexOf('#', searchFrom);
  447. if (colonIdx < 2 || slashIdx != -1 ||
  448. queryIdx != -1 || fragmentIdx != -1) {
  449. // A standalone base is a valid URI according to spec
  450. if (colonIdx == 0 || (p_base == null && fragmentIdx != 0)) {
  451. throw new MalformedURIException("No scheme found in URI.");
  452. }
  453. }
  454. else {
  455. initializeScheme(uriSpec);
  456. index = m_scheme.length()+1;
  457. // Neither 'scheme:' or 'scheme:#fragment' are valid URIs.
  458. if (colonIdx == uriSpecLen - 1 || uriSpec.charAt(colonIdx+1) == '#') {
  459. throw new MalformedURIException("Scheme specific part cannot be empty.");
  460. }
  461. }
  462. }
  463. else if (p_base == null && uriSpec.indexOf('#') != 0) {
  464. throw new MalformedURIException("No scheme found in URI.");
  465. }
  466. // Two slashes means we may have authority, but definitely means we're either
  467. // matching net_path or abs_path. These two productions are ambiguous in that
  468. // every net_path (except those containing an IPv6Reference) is an abs_path.
  469. // RFC 2396 resolves this ambiguity by applying a greedy left most matching rule.
  470. // Try matching net_path first, and if that fails we don't have authority so
  471. // then attempt to match abs_path.
  472. //
  473. // net_path = "//" authority [ abs_path ]
  474. // abs_path = "/" path_segments
  475. if (((index+1) < uriSpecLen) &&
  476. (uriSpec.charAt(index) == '/' && uriSpec.charAt(index+1) == '/')) {
  477. index += 2;
  478. int startPos = index;
  479. // Authority will be everything up to path, query or fragment
  480. char testChar = '\0';
  481. while (index < uriSpecLen) {
  482. testChar = uriSpec.charAt(index);
  483. if (testChar == '/' || testChar == '?' || testChar == '#') {
  484. break;
  485. }
  486. index++;
  487. }
  488. // Attempt to parse authority. If the section is an empty string
  489. // this is a valid server based authority, so set the host to this
  490. // value.
  491. if (index > startPos) {
  492. // If we didn't find authority we need to back up. Attempt to
  493. // match against abs_path next.
  494. if (!initializeAuthority(uriSpec.substring(startPos, index))) {
  495. index = startPos - 2;
  496. }
  497. }
  498. else {
  499. m_host = "";
  500. }
  501. }
  502. initializePath(uriSpec, index);
  503. // Resolve relative URI to base URI - see RFC 2396 Section 5.2
  504. // In some cases, it might make more sense to throw an exception
  505. // (when scheme is specified is the string spec and the base URI
  506. // is also specified, for example), but we're just following the
  507. // RFC specifications
  508. if (p_base != null) {
  509. // check to see if this is the current doc - RFC 2396 5.2 #2
  510. // note that this is slightly different from the RFC spec in that
  511. // we don't include the check for query string being null
  512. // - this handles cases where the urispec is just a query
  513. // string or a fragment (e.g. "?y" or "#s") -
  514. // see <http://www.ics.uci.edu/~fielding/url/test1.html> which
  515. // identified this as a bug in the RFC
  516. if (m_path.length() == 0 && m_scheme == null &&
  517. m_host == null && m_regAuthority == null) {
  518. m_scheme = p_base.getScheme();
  519. m_userinfo = p_base.getUserinfo();
  520. m_host = p_base.getHost();
  521. m_port = p_base.getPort();
  522. m_regAuthority = p_base.getRegBasedAuthority();
  523. m_path = p_base.getPath();
  524. if (m_queryString == null) {
  525. m_queryString = p_base.getQueryString();
  526. }
  527. return;
  528. }
  529. // check for scheme - RFC 2396 5.2 #3
  530. // if we found a scheme, it means absolute URI, so we're done
  531. if (m_scheme == null) {
  532. m_scheme = p_base.getScheme();
  533. }
  534. else {
  535. return;
  536. }
  537. // check for authority - RFC 2396 5.2 #4
  538. // if we found a host, then we've got a network path, so we're done
  539. if (m_host == null && m_regAuthority == null) {
  540. m_userinfo = p_base.getUserinfo();
  541. m_host = p_base.getHost();
  542. m_port = p_base.getPort();
  543. m_regAuthority = p_base.getRegBasedAuthority();
  544. }
  545. else {
  546. return;
  547. }
  548. // check for absolute path - RFC 2396 5.2 #5
  549. if (m_path.length() > 0 &&
  550. m_path.startsWith("/")) {
  551. return;
  552. }
  553. // if we get to this point, we need to resolve relative path
  554. // RFC 2396 5.2 #6
  555. String path = "";
  556. String basePath = p_base.getPath();
  557. // 6a - get all but the last segment of the base URI path
  558. if (basePath != null && basePath.length() > 0) {
  559. int lastSlash = basePath.lastIndexOf('/');
  560. if (lastSlash != -1) {
  561. path = basePath.substring(0, lastSlash+1);
  562. }
  563. }
  564. else if (m_path.length() > 0) {
  565. path = "/";
  566. }
  567. // 6b - append the relative URI path
  568. path = path.concat(m_path);
  569. // 6c - remove all "./" where "." is a complete path segment
  570. index = -1;
  571. while ((index = path.indexOf("/./")) != -1) {
  572. path = path.substring(0, index+1).concat(path.substring(index+3));
  573. }
  574. // 6d - remove "." if path ends with "." as a complete path segment
  575. if (path.endsWith("/.")) {
  576. path = path.substring(0, path.length()-1);
  577. }
  578. // 6e - remove all "<segment>/../" where "<segment>" is a complete
  579. // path segment not equal to ".."
  580. index = 1;
  581. int segIndex = -1;
  582. String tempString = null;
  583. while ((index = path.indexOf("/../", index)) > 0) {
  584. tempString = path.substring(0, path.indexOf("/../"));
  585. segIndex = tempString.lastIndexOf('/');
  586. if (segIndex != -1) {
  587. if (!tempString.substring(segIndex).equals("..")) {
  588. path = path.substring(0, segIndex+1).concat(path.substring(index+4));
  589. index = segIndex;
  590. }
  591. else
  592. index += 4;
  593. }
  594. else
  595. index += 4;
  596. }
  597. // 6f - remove ending "<segment>/.." where "<segment>" is a
  598. // complete path segment
  599. if (path.endsWith("/..")) {
  600. tempString = path.substring(0, path.length()-3);
  601. segIndex = tempString.lastIndexOf('/');
  602. if (segIndex != -1) {
  603. path = path.substring(0, segIndex+1);
  604. }
  605. }
  606. m_path = path;
  607. }
  608. }
  609. /**
  610. * Initialize the scheme for this URI from a URI string spec.
  611. *
  612. * @param p_uriSpec the URI specification (cannot be null)
  613. *
  614. * @exception MalformedURIException if URI does not have a conformant
  615. * scheme
  616. */
  617. private void initializeScheme(String p_uriSpec)
  618. throws MalformedURIException {
  619. int uriSpecLen = p_uriSpec.length();
  620. int index = 0;
  621. String scheme = null;
  622. char testChar = '\0';
  623. while (index < uriSpecLen) {
  624. testChar = p_uriSpec.charAt(index);
  625. if (testChar == ':' || testChar == '/' ||
  626. testChar == '?' || testChar == '#') {
  627. break;
  628. }
  629. index++;
  630. }
  631. scheme = p_uriSpec.substring(0, index);
  632. if (scheme.length() == 0) {
  633. throw new MalformedURIException("No scheme found in URI.");
  634. }
  635. else {
  636. setScheme(scheme);
  637. }
  638. }
  639. /**
  640. * Initialize the authority (either server or registry based)
  641. * for this URI from a URI string spec.
  642. *
  643. * @param p_uriSpec the URI specification (cannot be null)
  644. *
  645. * @return true if the given string matched server or registry
  646. * based authority
  647. */
  648. private boolean initializeAuthority(String p_uriSpec) {
  649. int index = 0;
  650. int start = 0;
  651. int end = p_uriSpec.length();
  652. char testChar = '\0';
  653. String userinfo = null;
  654. // userinfo is everything up to @
  655. if (p_uriSpec.indexOf('@', start) != -1) {
  656. while (index < end) {
  657. testChar = p_uriSpec.charAt(index);
  658. if (testChar == '@') {
  659. break;
  660. }
  661. index++;
  662. }
  663. userinfo = p_uriSpec.substring(start, index);
  664. index++;
  665. }
  666. // host is everything up to last ':', or up to
  667. // and including ']' if followed by ':'.
  668. String host = null;
  669. start = index;
  670. boolean hasPort = false;
  671. if (index < end) {
  672. if (p_uriSpec.charAt(start) == '[') {
  673. int bracketIndex = p_uriSpec.indexOf(']', start);
  674. index = (bracketIndex != -1) ? bracketIndex : end;
  675. if (index+1 < end && p_uriSpec.charAt(index+1) == ':') {
  676. ++index;
  677. hasPort = true;
  678. }
  679. else {
  680. index = end;
  681. }
  682. }
  683. else {
  684. int colonIndex = p_uriSpec.lastIndexOf(':', end);
  685. index = (colonIndex > start) ? colonIndex : end;
  686. hasPort = (index != end);
  687. }
  688. }
  689. host = p_uriSpec.substring(start, index);
  690. int port = -1;
  691. if (host.length() > 0) {
  692. // port
  693. if (hasPort) {
  694. index++;
  695. start = index;
  696. while (index < end) {
  697. index++;
  698. }
  699. String portStr = p_uriSpec.substring(start, index);
  700. if (portStr.length() > 0) {
  701. // REVISIT: Remove this code.
  702. /** for (int i = 0; i < portStr.length(); i++) {
  703. if (!isDigit(portStr.charAt(i))) {
  704. throw new MalformedURIException(
  705. portStr +
  706. " is invalid. Port should only contain digits!");
  707. }
  708. }**/
  709. // REVISIT: Remove this code.
  710. // Store port value as string instead of integer.
  711. try {
  712. port = Integer.parseInt(portStr);
  713. if (port == -1) --port;
  714. }
  715. catch (NumberFormatException nfe) {
  716. port = -2;
  717. }
  718. }
  719. }
  720. }
  721. if (isValidServerBasedAuthority(host, port, userinfo)) {
  722. m_host = host;
  723. m_port = port;
  724. m_userinfo = userinfo;
  725. return true;
  726. }
  727. // Note: Registry based authority is being removed from a
  728. // new spec for URI which would obsolete RFC 2396. If the
  729. // spec is added to XML errata, processing of reg_name
  730. // needs to be removed. - mrglavas.
  731. else if (isValidRegistryBasedAuthority(p_uriSpec)) {
  732. m_regAuthority = p_uriSpec;
  733. return true;
  734. }
  735. return false;
  736. }
  737. /**
  738. * Determines whether the components host, port, and user info
  739. * are valid as a server authority.
  740. *
  741. * @param host the host component of authority
  742. * @param port the port number component of authority
  743. * @param userinfo the user info component of authority
  744. *
  745. * @return true if the given host, port, and userinfo compose
  746. * a valid server authority
  747. */
  748. private boolean isValidServerBasedAuthority(String host, int port, String userinfo) {
  749. // Check if the host is well formed.
  750. if (!isWellFormedAddress(host)) {
  751. return false;
  752. }
  753. // Check that port is well formed if it exists.
  754. // REVISIT: There's no restriction on port value ranges, but
  755. // perform the same check as in setPort to be consistent. Pass
  756. // in a string to this method instead of an integer.
  757. if (port < -1 || port > 65535) {
  758. return false;
  759. }
  760. // Check that userinfo is well formed if it exists.
  761. if (userinfo != null) {
  762. // Userinfo can contain alphanumerics, mark characters, escaped
  763. // and ';',':','&','=','+','$',','
  764. int index = 0;
  765. int end = userinfo.length();
  766. char testChar = '\0';
  767. while (index < end) {
  768. testChar = userinfo.charAt(index);
  769. if (testChar == '%') {
  770. if (index+2 >= end ||
  771. !isHex(userinfo.charAt(index+1)) ||
  772. !isHex(userinfo.charAt(index+2))) {
  773. return false;
  774. }
  775. index += 2;
  776. }
  777. else if (!isUserinfoCharacter(testChar)) {
  778. return false;
  779. }
  780. ++index;
  781. }
  782. }
  783. return true;
  784. }
  785. /**
  786. * Determines whether the given string is a registry based authority.
  787. *
  788. * @param authority the authority component of a URI
  789. *
  790. * @return true if the given string is a registry based authority
  791. */
  792. private boolean isValidRegistryBasedAuthority(String authority) {
  793. int index = 0;
  794. int end = authority.length();
  795. char testChar;
  796. while (index < end) {
  797. testChar = authority.charAt(index);
  798. // check for valid escape sequence
  799. if (testChar == '%') {
  800. if (index+2 >= end ||
  801. !isHex(authority.charAt(index+1)) ||
  802. !isHex(authority.charAt(index+2))) {
  803. return false;
  804. }
  805. index += 2;
  806. }
  807. // can check against path characters because the set
  808. // is the same except for '/' which we've already excluded.
  809. else if (!isPathCharacter(testChar)) {
  810. return false;
  811. }
  812. ++index;
  813. }
  814. return true;
  815. }
  816. /**
  817. * Initialize the path for this URI from a URI string spec.
  818. *
  819. * @param p_uriSpec the URI specification (cannot be null)
  820. * @param p_nStartIndex the index to begin scanning from
  821. *
  822. * @exception MalformedURIException if p_uriSpec violates syntax rules
  823. */
  824. private void initializePath(String p_uriSpec, int p_nStartIndex)
  825. throws MalformedURIException {
  826. if (p_uriSpec == null) {
  827. throw new MalformedURIException(
  828. "Cannot initialize path from null string!");
  829. }
  830. int index = p_nStartIndex;
  831. int start = p_nStartIndex;
  832. int end = p_uriSpec.length();
  833. char testChar = '\0';
  834. // path - everything up to query string or fragment
  835. if (start < end) {
  836. // RFC 2732 only allows '[' and ']' to appear in the opaque part.
  837. if (getScheme() == null || p_uriSpec.charAt(start) == '/') {
  838. // Scan path.
  839. // abs_path = "/" path_segments
  840. // rel_path = rel_segment [ abs_path ]
  841. while (index < end) {
  842. testChar = p_uriSpec.charAt(index);
  843. // check for valid escape sequence
  844. if (testChar == '%') {
  845. if (index+2 >= end ||
  846. !isHex(p_uriSpec.charAt(index+1)) ||
  847. !isHex(p_uriSpec.charAt(index+2))) {
  848. throw new MalformedURIException(
  849. "Path contains invalid escape sequence!");
  850. }
  851. index += 2;
  852. }
  853. // Path segments cannot contain '[' or ']' since pchar
  854. // production was not changed by RFC 2732.
  855. else if (!isPathCharacter(testChar)) {
  856. if (testChar == '?' || testChar == '#') {
  857. break;
  858. }
  859. throw new MalformedURIException(
  860. "Path contains invalid character: " + testChar);
  861. }
  862. ++index;
  863. }
  864. }
  865. else {
  866. // Scan opaque part.
  867. // opaque_part = uric_no_slash *uric
  868. while (index < end) {
  869. testChar = p_uriSpec.charAt(index);
  870. if (testChar == '?' || testChar == '#') {
  871. break;
  872. }
  873. // check for valid escape sequence
  874. if (testChar == '%') {
  875. if (index+2 >= end ||
  876. !isHex(p_uriSpec.charAt(index+1)) ||
  877. !isHex(p_uriSpec.charAt(index+2))) {
  878. throw new MalformedURIException(
  879. "Opaque part contains invalid escape sequence!");
  880. }
  881. index += 2;
  882. }
  883. // If the scheme specific part is opaque, it can contain '['
  884. // and ']'. uric_no_slash wasn't modified by RFC 2732, which
  885. // I've interpreted as an error in the spec, since the
  886. // production should be equivalent to (uric - '/'), and uric
  887. // contains '[' and ']'. - mrglavas
  888. else if (!isURICharacter(testChar)) {
  889. throw new MalformedURIException(
  890. "Opaque part contains invalid character: " + testChar);
  891. }
  892. ++index;
  893. }
  894. }
  895. }
  896. m_path = p_uriSpec.substring(start, index);
  897. // query - starts with ? and up to fragment or end
  898. if (testChar == '?') {
  899. index++;
  900. start = index;
  901. while (index < end) {
  902. testChar = p_uriSpec.charAt(index);
  903. if (testChar == '#') {
  904. break;
  905. }
  906. if (testChar == '%') {
  907. if (index+2 >= end ||
  908. !isHex(p_uriSpec.charAt(index+1)) ||
  909. !isHex(p_uriSpec.charAt(index+2))) {
  910. throw new MalformedURIException(
  911. "Query string contains invalid escape sequence!");
  912. }
  913. index += 2;
  914. }
  915. else if (!isURICharacter(testChar)) {
  916. throw new MalformedURIException(
  917. "Query string contains invalid character: " + testChar);
  918. }
  919. index++;
  920. }
  921. m_queryString = p_uriSpec.substring(start, index);
  922. }
  923. // fragment - starts with #
  924. if (testChar == '#') {
  925. index++;
  926. start = index;
  927. while (index < end) {
  928. testChar = p_uriSpec.charAt(index);
  929. if (testChar == '%') {
  930. if (index+2 >= end ||
  931. !isHex(p_uriSpec.charAt(index+1)) ||
  932. !isHex(p_uriSpec.charAt(index+2))) {
  933. throw new MalformedURIException(
  934. "Fragment contains invalid escape sequence!");
  935. }
  936. index += 2;
  937. }
  938. else if (!isURICharacter(testChar)) {
  939. throw new MalformedURIException(
  940. "Fragment contains invalid character: "+testChar);
  941. }
  942. index++;
  943. }
  944. m_fragment = p_uriSpec.substring(start, index);
  945. }
  946. }
  947. /**
  948. * Get the scheme for this URI.
  949. *
  950. * @return the scheme for this URI
  951. */
  952. public String getScheme() {
  953. return m_scheme;
  954. }
  955. /**
  956. * Get the scheme-specific part for this URI (everything following the
  957. * scheme and the first colon). See RFC 2396 Section 5.2 for spec.
  958. *
  959. * @return the scheme-specific part for this URI
  960. */
  961. public String getSchemeSpecificPart() {
  962. StringBuffer schemespec = new StringBuffer();
  963. if (m_host != null || m_regAuthority != null) {
  964. schemespec.append("//");
  965. // Server based authority.
  966. if (m_host != null) {
  967. if (m_userinfo != null) {
  968. schemespec.append(m_userinfo);
  969. schemespec.append('@');
  970. }
  971. schemespec.append(m_host);
  972. if (m_port != -1) {
  973. schemespec.append(':');
  974. schemespec.append(m_port);
  975. }
  976. }
  977. // Registry based authority.
  978. else {
  979. schemespec.append(m_regAuthority);
  980. }
  981. }
  982. if (m_path != null) {
  983. schemespec.append((m_path));
  984. }
  985. if (m_queryString != null) {
  986. schemespec.append('?');
  987. schemespec.append(m_queryString);
  988. }
  989. if (m_fragment != null) {
  990. schemespec.append('#');
  991. schemespec.append(m_fragment);
  992. }
  993. return schemespec.toString();
  994. }
  995. /**
  996. * Get the userinfo for this URI.
  997. *
  998. * @return the userinfo for this URI (null if not specified).
  999. */
  1000. public String getUserinfo() {
  1001. return m_userinfo;
  1002. }
  1003. /**
  1004. * Get the host for this URI.
  1005. *
  1006. * @return the host for this URI (null if not specified).
  1007. */
  1008. public String getHost() {
  1009. return m_host;
  1010. }
  1011. /**
  1012. * Get the port for this URI.
  1013. *
  1014. * @return the port for this URI (-1 if not specified).
  1015. */
  1016. public int getPort() {
  1017. return m_port;
  1018. }
  1019. /**
  1020. * Get the registry based authority for this URI.
  1021. *
  1022. * @return the registry based authority (null if not specified).
  1023. */
  1024. public String getRegBasedAuthority() {
  1025. return m_regAuthority;
  1026. }
  1027. /**
  1028. * Get the path for this URI (optionally with the query string and
  1029. * fragment).
  1030. *
  1031. * @param p_includeQueryString if true (and query string is not null),
  1032. * then a "?" followed by the query string
  1033. * will be appended
  1034. * @param p_includeFragment if true (and fragment is not null),
  1035. * then a "#" followed by the fragment
  1036. * will be appended
  1037. *
  1038. * @return the path for this URI possibly including the query string
  1039. * and fragment
  1040. */
  1041. public String getPath(boolean p_includeQueryString,
  1042. boolean p_includeFragment) {
  1043. StringBuffer pathString = new StringBuffer(m_path);
  1044. if (p_includeQueryString && m_queryString != null) {
  1045. pathString.append('?');
  1046. pathString.append(m_queryString);
  1047. }
  1048. if (p_includeFragment && m_fragment != null) {
  1049. pathString.append('#');
  1050. pathString.append(m_fragment);
  1051. }
  1052. return pathString.toString();
  1053. }
  1054. /**
  1055. * Get the path for this URI. Note that the value returned is the path
  1056. * only and does not include the query string or fragment.
  1057. *
  1058. * @return the path for this URI.
  1059. */
  1060. public String getPath() {
  1061. return m_path;
  1062. }
  1063. /**
  1064. * Get the query string for this URI.
  1065. *
  1066. * @return the query string for this URI. Null is returned if there
  1067. * was no "?" in the URI spec, empty string if there was a
  1068. * "?" but no query string following it.
  1069. */
  1070. public String getQueryString() {
  1071. return m_queryString;
  1072. }
  1073. /**
  1074. * Get the fragment for this URI.
  1075. *
  1076. * @return the fragment for this URI. Null is returned if there
  1077. * was no "#" in the URI spec, empty string if there was a
  1078. * "#" but no fragment following it.
  1079. */
  1080. public String getFragment() {
  1081. return m_fragment;
  1082. }
  1083. /**
  1084. * Set the scheme for this URI. The scheme is converted to lowercase
  1085. * before it is set.
  1086. *
  1087. * @param p_scheme the scheme for this URI (cannot be null)
  1088. *
  1089. * @exception MalformedURIException if p_scheme is not a conformant
  1090. * scheme name
  1091. */
  1092. public void setScheme(String p_scheme) throws MalformedURIException {
  1093. if (p_scheme == null) {
  1094. throw new MalformedURIException(
  1095. "Cannot set scheme from null string!");
  1096. }
  1097. if (!isConformantSchemeName(p_scheme)) {
  1098. throw new MalformedURIException("The scheme is not conformant.");
  1099. }
  1100. m_scheme = p_scheme.toLowerCase();
  1101. }
  1102. /**
  1103. * Set the userinfo for this URI. If a non-null value is passed in and
  1104. * the host value is null, then an exception is thrown.
  1105. *
  1106. * @param p_userinfo the userinfo for this URI
  1107. *
  1108. * @exception MalformedURIException if p_userinfo contains invalid
  1109. * characters
  1110. */
  1111. public void setUserinfo(String p_userinfo) throws MalformedURIException {
  1112. if (p_userinfo == null) {
  1113. m_userinfo = null;
  1114. return;
  1115. }
  1116. else {
  1117. if (m_host == null) {
  1118. throw new MalformedURIException(
  1119. "Userinfo cannot be set when host is null!");
  1120. }
  1121. // userinfo can contain alphanumerics, mark characters, escaped
  1122. // and ';',':','&','=','+','$',','
  1123. int index = 0;
  1124. int end = p_userinfo.length();
  1125. char testChar = '\0';
  1126. while (index < end) {
  1127. testChar = p_userinfo.charAt(index);
  1128. if (testChar == '%') {
  1129. if (index+2 >= end ||
  1130. !isHex(p_userinfo.charAt(index+1)) ||
  1131. !isHex(p_userinfo.charAt(index+2))) {
  1132. throw new MalformedURIException(
  1133. "Userinfo contains invalid escape sequence!");
  1134. }
  1135. }
  1136. else if (!isUserinfoCharacter(testChar)) {
  1137. throw new MalformedURIException(
  1138. "Userinfo contains invalid character:"+testChar);
  1139. }
  1140. index++;
  1141. }
  1142. }
  1143. m_userinfo = p_userinfo;
  1144. }
  1145. /**
  1146. * <p>Set the host for this URI. If null is passed in, the userinfo
  1147. * field is also set to null and the port is set to -1.</p>
  1148. *
  1149. * <p>Note: This method overwrites registry based authority if it
  1150. * previously existed in this URI.</p>
  1151. *
  1152. * @param p_host the host for this URI
  1153. *
  1154. * @exception MalformedURIException if p_host is not a valid IP
  1155. * address or DNS hostname.
  1156. */
  1157. public void setHost(String p_host) throws MalformedURIException {
  1158. if (p_host == null || p_host.length() == 0) {
  1159. if (p_host != null) {
  1160. m_regAuthority = null;
  1161. }
  1162. m_host = p_host;
  1163. m_userinfo = null;
  1164. m_port = -1;
  1165. return;
  1166. }
  1167. else if (!isWellFormedAddress(p_host)) {
  1168. throw new MalformedURIException("Host is not a well formed address!");
  1169. }
  1170. m_host = p_host;
  1171. m_regAuthority = null;
  1172. }
  1173. /**
  1174. * Set the port for this URI. -1 is used to indicate that the port is
  1175. * not specified, otherwise valid port numbers are between 0 and 65535.
  1176. * If a valid port number is passed in and the host field is null,
  1177. * an exception is thrown.
  1178. *
  1179. * @param p_port the port number for this URI
  1180. *
  1181. * @exception MalformedURIException if p_port is not -1 and not a
  1182. * valid port number
  1183. */
  1184. public void setPort(int p_port) throws MalformedURIException {
  1185. if (p_port >= 0 && p_port <= 65535) {
  1186. if (m_host == null) {
  1187. throw new MalformedURIException(
  1188. "Port cannot be set when host is null!");
  1189. }
  1190. }
  1191. else if (p_port != -1) {
  1192. throw new MalformedURIException("Invalid port number!");
  1193. }
  1194. m_port = p_port;
  1195. }
  1196. /**
  1197. * <p>Sets the registry based authority for this URI.</p>
  1198. *
  1199. * <p>Note: This method overwrites server based authority
  1200. * if it previously existed in this URI.</p>
  1201. *
  1202. * @param authority the registry based authority for this URI
  1203. *
  1204. * @exception MalformedURIException it authority is not a
  1205. * well formed registry based authority
  1206. */
  1207. public void setRegBasedAuthority(String authority)
  1208. throws MalformedURIException {
  1209. if (authority == null) {
  1210. m_regAuthority = null;
  1211. return;
  1212. }
  1213. // reg_name = 1*( unreserved | escaped | "$" | "," |
  1214. // ";" | ":" | "@" | "&" | "=" | "+" )
  1215. else if (authority.length() < 1 ||
  1216. !isValidRegistryBasedAuthority(authority) ||
  1217. authority.indexOf('/') != -1) {
  1218. throw new MalformedURIException("Registry based authority is not well formed.");
  1219. }
  1220. m_regAuthority = authority;
  1221. m_host = null;
  1222. m_userinfo = null;
  1223. m_port = -1;
  1224. }
  1225. /**
  1226. * Set the path for this URI. If the supplied path is null, then the
  1227. * query string and fragment are set to null as well. If the supplied
  1228. * path includes a query string and/or fragment, these fields will be
  1229. * parsed and set as well. Note that, for URIs following the "generic
  1230. * URI" syntax, the path specified should start with a slash.
  1231. * For URIs that do not follow the generic URI syntax, this method
  1232. * sets the scheme-specific part.
  1233. *
  1234. * @param p_path the path for this URI (may be null)
  1235. *
  1236. * @exception MalformedURIException if p_path contains invalid
  1237. * characters
  1238. */
  1239. public void setPath(String p_path) throws MalformedURIException {
  1240. if (p_path == null) {
  1241. m_path = null;
  1242. m_queryString = null;
  1243. m_fragment = null;
  1244. }
  1245. else {
  1246. initializePath(p_path, 0);
  1247. }
  1248. }
  1249. /**
  1250. * Append to the end of the path of this URI. If the current path does
  1251. * not end in a slash and the path to be appended does not begin with
  1252. * a slash, a slash will be appended to the current path before the
  1253. * new segment is added. Also, if the current path ends in a slash
  1254. * and the new segment begins with a slash, the extra slash will be
  1255. * removed before the new segment is appended.
  1256. *
  1257. * @param p_addToPath the new segment to be added to the current path
  1258. *
  1259. * @exception MalformedURIException if p_addToPath contains syntax
  1260. * errors
  1261. */
  1262. public void appendPath(String p_addToPath)
  1263. throws MalformedURIException {
  1264. if (p_addToPath == null || p_addToPath.trim().length() == 0) {
  1265. return;
  1266. }
  1267. if (!isURIString(p_addToPath)) {
  1268. throw new MalformedURIException(
  1269. "Path contains invalid character!");
  1270. }
  1271. if (m_path == null || m_path.trim().length() == 0) {
  1272. if (p_addToPath.startsWith("/")) {
  1273. m_path = p_addToPath;
  1274. }
  1275. else {
  1276. m_path = "/" + p_addToPath;
  1277. }
  1278. }
  1279. else if (m_path.endsWith("/")) {
  1280. if (p_addToPath.startsWith("/")) {
  1281. m_path = m_path.concat(p_addToPath.substring(1));
  1282. }
  1283. else {
  1284. m_path = m_path.concat(p_addToPath);
  1285. }
  1286. }
  1287. else {
  1288. if (p_addToPath.startsWith("/")) {
  1289. m_path = m_path.concat(p_addToPath);
  1290. }
  1291. else {
  1292. m_path = m_path.concat("/" + p_addToPath);
  1293. }
  1294. }
  1295. }
  1296. /**
  1297. * Set the query string for this URI. A non-null value is valid only
  1298. * if this is an URI conforming to the generic URI syntax and
  1299. * the path value is not null.
  1300. *
  1301. * @param p_queryString the query string for this URI
  1302. *
  1303. * @exception MalformedURIException if p_queryString is not null and this
  1304. * URI does not conform to the generic
  1305. * URI syntax or if the path is null
  1306. */
  1307. public void setQueryString(String p_queryString) throws MalformedURIException {
  1308. if (p_queryString == null) {
  1309. m_queryString = null;
  1310. }
  1311. else if (!isGenericURI()) {
  1312. throw new MalformedURIException(
  1313. "Query string can only be set for a generic URI!");
  1314. }
  1315. else if (getPath() == null) {
  1316. throw new MalformedURIException(
  1317. "Query string cannot be set when path is null!");
  1318. }
  1319. else if (!isURIString(p_queryString)) {
  1320. throw new MalformedURIException(
  1321. "Query string contains invalid character!");
  1322. }
  1323. else {
  1324. m_queryString = p_queryString;
  1325. }
  1326. }
  1327. /**
  1328. * Set the fragment for this URI. A non-null value is valid only
  1329. * if this is a URI conforming to the generic URI syntax and
  1330. * the path value is not null.
  1331. *
  1332. * @param p_fragment the fragment for this URI
  1333. *
  1334. * @exception MalformedURIException if p_fragment is not null and this
  1335. * URI does not conform to the generic
  1336. * URI syntax or if the path is null
  1337. */
  1338. public void setFragment(String p_fragment) throws MalformedURIException {
  1339. if (p_fragment == null) {
  1340. m_fragment = null;
  1341. }
  1342. else if (!isGenericURI()) {
  1343. throw new MalformedURIException(
  1344. "Fragment can only be set for a generic URI!");
  1345. }
  1346. else if (getPath() == null) {
  1347. throw new MalformedURIException(
  1348. "Fragment cannot be set when path is null!");
  1349. }
  1350. else if (!isURIString(p_fragment)) {
  1351. throw new MalformedURIException(
  1352. "Fragment contains invalid character!");
  1353. }
  1354. else {
  1355. m_fragment = p_fragment;
  1356. }
  1357. }
  1358. /**
  1359. * Determines if the passed-in Object is equivalent to this URI.
  1360. *
  1361. * @param p_test the Object to test for equality.
  1362. *
  1363. * @return true if p_test is a URI with all values equal to this
  1364. * URI, false otherwise
  1365. */
  1366. public boolean equals(Object p_test) {
  1367. if (p_test instanceof URI) {
  1368. URI testURI = (URI) p_test;
  1369. if (((m_scheme == null && testURI.m_scheme == null) ||
  1370. (m_scheme != null && testURI.m_scheme != null &&
  1371. m_scheme.equals(testURI.m_scheme))) &&
  1372. ((m_userinfo == null && testURI.m_userinfo == null) ||
  1373. (m_userinfo != null && testURI.m_userinfo != null &&
  1374. m_userinfo.equals(testURI.m_userinfo))) &&
  1375. ((m_host == null && testURI.m_host == null) ||
  1376. (m_host != null && testURI.m_host != null &&
  1377. m_host.equals(testURI.m_host))) &&
  1378. m_port == testURI.m_port &&
  1379. ((m_path == null && testURI.m_path == null) ||
  1380. (m_path != null && testURI.m_path != null &&
  1381. m_path.equals(testURI.m_path))) &&
  1382. ((m_queryString == null && testURI.m_queryString == null) ||
  1383. (m_queryString != null && testURI.m_queryString != null &&
  1384. m_queryString.equals(testURI.m_queryString))) &&
  1385. ((m_fragment == null && testURI.m_fragment == null) ||
  1386. (m_fragment != null && testURI.m_fragment != null &&
  1387. m_fragment.equals(testURI.m_fragment)))) {
  1388. return true;
  1389. }
  1390. }
  1391. return false;
  1392. }
  1393. /**
  1394. * Get the URI as a string specification. See RFC 2396 Section 5.2.
  1395. *
  1396. * @return the URI string specification
  1397. */
  1398. public String toString() {
  1399. StringBuffer uriSpecString = new StringBuffer();
  1400. if (m_scheme != null) {
  1401. uriSpecString.append(m_scheme);
  1402. uriSpecString.append(':');
  1403. }
  1404. uriSpecString.append(getSchemeSpecificPart());
  1405. return uriSpecString.toString();
  1406. }
  1407. /**
  1408. * Get the indicator as to whether this URI uses the "generic URI"
  1409. * syntax.
  1410. *
  1411. * @return true if this URI uses the "generic URI" syntax, false
  1412. * otherwise
  1413. */
  1414. public boolean isGenericURI() {
  1415. // presence of the host (whether valid or empty) means
  1416. // double-slashes which means generic uri
  1417. return (m_host != null);
  1418. }
  1419. /**
  1420. * Determine whether a scheme conforms to the rules for a scheme name.
  1421. * A scheme is conformant if it starts with an alphanumeric, and
  1422. * contains only alphanumerics, '+','-' and '.'.
  1423. *
  1424. * @return true if the scheme is conformant, false otherwise
  1425. */
  1426. public static boolean isConformantSchemeName(String p_scheme) {
  1427. if (p_scheme == null || p_scheme.trim().length() == 0) {
  1428. return false;
  1429. }
  1430. if (!isAlpha(p_scheme.charAt(0))) {
  1431. return false;
  1432. }
  1433. char testChar;
  1434. int schemeLength = p_scheme.length();
  1435. for (int i = 1; i < schemeLength; ++i) {
  1436. testChar = p_scheme.charAt(i);
  1437. if (!isSchemeCharacter(testChar)) {
  1438. return false;
  1439. }
  1440. }
  1441. return true;
  1442. }
  1443. /**
  1444. * Determine whether a string is syntactically capable of representing
  1445. * a valid IPv4 address, IPv6 reference or the domain name of a network host.
  1446. * A valid IPv4 address consists of four decimal digit groups separated by a
  1447. * '.'. Each group must consist of one to three digits. See RFC 2732 Section 3,
  1448. * and RFC 2373 Section 2.2, for the definition of IPv6 references. A hostname
  1449. * consists of domain labels (each of which must begin and end with an alphanumeric
  1450. * but may contain '-') separated & by a '.'. See RFC 2396 Section 3.2.2.
  1451. *
  1452. * @return true if the string is a syntactically valid IPv4 address,
  1453. * IPv6 reference or hostname
  1454. */
  1455. public static boolean isWellFormedAddress(String address) {
  1456. if (address == null) {
  1457. return false;
  1458. }
  1459. int addrLength = address.length();
  1460. if (addrLength == 0) {
  1461. return false;
  1462. }
  1463. // Check if the host is a valid IPv6reference.
  1464. if (address.startsWith("[")) {
  1465. return isWellFormedIPv6Reference(address);
  1466. }
  1467. // Cannot start with a '.', '-', or end with a '-'.
  1468. if (address.startsWith(".") ||
  1469. address.startsWith("-") ||
  1470. address.endsWith("-")) {
  1471. return false;
  1472. }
  1473. // rightmost domain label starting with digit indicates IP address
  1474. // since top level domain label can only start with an alpha
  1475. // see RFC 2396 Section 3.2.2
  1476. int index = address.lastIndexOf('.');
  1477. if (address.endsWith(".")) {
  1478. index = address.substring(0, index).lastIndexOf('.');
  1479. }
  1480. if (index+1 < addrLength && isDigit(address.charAt(index+1))) {
  1481. return isWellFormedIPv4Address(address);
  1482. }
  1483. else {
  1484. // hostname = *( domainlabel "." ) toplabel [ "." ]
  1485. // domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
  1486. // toplabel = alpha | alpha *( alphanum | "-" ) alphanum
  1487. // RFC 2396 states that hostnames take the form described in
  1488. // RFC 1034 (Section 3) and RFC 1123 (Section 2.1). According
  1489. // to RFC 1034, hostnames are limited to 255 characters.
  1490. if (addrLength > 255) {
  1491. return false;
  1492. }
  1493. // domain labels can contain alphanumerics and '-"
  1494. // but must start and end with an alphanumeric
  1495. char testChar;
  1496. int labelCharCount = 0;
  1497. for (int i = 0; i < addrLength; i++) {
  1498. testChar = address.charAt(i);
  1499. if (testChar == '.') {
  1500. if (!isAlphanum(address.charAt(i-1))) {
  1501. return false;
  1502. }
  1503. if (i+1 < addrLength && !isAlphanum(address.charAt(i+1))) {
  1504. return false;
  1505. }
  1506. labelCharCount = 0;
  1507. }
  1508. else if (!isAlphanum(testChar) && testChar != '-') {
  1509. return false;
  1510. }
  1511. // RFC 1034: Labels must be 63 characters or less.
  1512. else if (++labelCharCount > 63) {
  1513. return false;
  1514. }
  1515. }
  1516. }
  1517. return true;
  1518. }
  1519. /**
  1520. * <p>Determines whether a string is an IPv4 address as defined by
  1521. * RFC 2373, and under the further constraint that it must be a 32-bit
  1522. * address. Though not expressed in the grammar, in order to satisfy
  1523. * the 32-bit address constraint, each segment of the address cannot
  1524. * be greater than 255 (8 bits of information).</p>
  1525. *
  1526. * <p><code>IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT</code></p>
  1527. *
  1528. * @return true if the string is a syntactically valid IPv4 address
  1529. */
  1530. public static boolean isWellFormedIPv4Address(String address) {
  1531. int addrLength = address.length();
  1532. char testChar;
  1533. int numDots = 0;
  1534. int numDigits = 0;
  1535. // make sure that 1) we see only digits and dot separators, 2) that
  1536. // any dot separator is preceded and followed by a digit and
  1537. // 3) that we find 3 dots
  1538. //
  1539. // RFC 2732 amended RFC 2396 by replacing the definition
  1540. // of IPv4address with the one defined by RFC 2373. - mrglavas
  1541. //
  1542. // IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT
  1543. //
  1544. // One to three digits must be in each segment.
  1545. for (int i = 0; i < addrLength; i++) {
  1546. testChar = address.charAt(i);
  1547. if (testChar == '.') {
  1548. if ((i > 0 && !isDigit(address.charAt(i-1))) ||
  1549. (i+1 < addrLength && !isDigit(address.charAt(i+1)))) {
  1550. return false;
  1551. }
  1552. numDigits = 0;
  1553. if (++numDots > 3) {
  1554. return false;
  1555. }
  1556. }
  1557. else if (!isDigit(testChar)) {
  1558. return false;
  1559. }
  1560. // Check that that there are no more than three digits
  1561. // in this segment.
  1562. else if (++numDigits > 3) {
  1563. return false;
  1564. }
  1565. // Check that this segment is not greater than 255.
  1566. else if (numDigits == 3) {
  1567. char first = address.charAt(i-2);
  1568. char second = address.charAt(i-1);
  1569. if (!(first < '2' ||
  1570. (first == '2' &&
  1571. (second < '5' ||
  1572. (second == '5' && testChar <= '5'))))) {
  1573. return false;
  1574. }
  1575. }
  1576. }
  1577. return (numDots == 3);
  1578. }
  1579. /**
  1580. * <p>Determines whether a string is an IPv6 reference as defined
  1581. * by RFC 2732, where IPv6address is defined in RFC 2373. The
  1582. * IPv6 address is parsed according to Section 2.2 of RFC 2373,
  1583. * with the additional constraint that the address be composed of
  1584. * 128 bits of information.</p>
  1585. *
  1586. * <p><code>IPv6reference = "[" IPv6address "]"</code></p>
  1587. *
  1588. * <p>Note: The BNF expressed in RFC 2373 Appendix B does not
  1589. * accurately describe section 2.2, and was in fact removed from
  1590. * RFC 3513, the successor of RFC 2373.</p>
  1591. *
  1592. * @return true if the string is a syntactically valid IPv6 reference
  1593. */
  1594. public static boolean isWellFormedIPv6Reference(String address) {
  1595. int addrLength = address.length();
  1596. int index = 1;
  1597. int end = addrLength-1;
  1598. // Check if string is a potential match for IPv6reference.
  1599. if (!(addrLength > 2 && address.charAt(0) == '['
  1600. && address.charAt(end) == ']')) {
  1601. return false;
  1602. }
  1603. // Counter for the number of 16-bit sections read in the address.
  1604. int [] counter = new int[1];
  1605. // Scan hex sequence before possible '::' or IPv4 address.
  1606. index = scanHexSequence(address, index, end, counter);
  1607. if (index == -1) {
  1608. return false;
  1609. }
  1610. // Address must contain 128-bits of information.
  1611. else if (index == end) {
  1612. return (counter[0] == 8);
  1613. }
  1614. if (index+1 < end && address.charAt(index) == ':') {
  1615. if (address.charAt(index+1) == ':') {
  1616. // '::' represents at least one 16-bit group of zeros.
  1617. if (++counter[0] > 8) {
  1618. return false;
  1619. }
  1620. index += 2;
  1621. // Trailing zeros will fill out the rest of the address.
  1622. if (index == end) {
  1623. return true;
  1624. }
  1625. }
  1626. // If the second character wasn't ':', in order to be valid,
  1627. // the remainder of the string must match IPv4Address,
  1628. // and we must have read exactly 6 16-bit groups.
  1629. else {
  1630. return (counter[0] == 6) &&
  1631. isWellFormedIPv4Address(address.substring(index+1, end));
  1632. }
  1633. }
  1634. else {
  1635. return false;
  1636. }
  1637. // 3. Scan hex sequence after '::'.
  1638. int prevCount = counter[0];
  1639. index = scanHexSequence(address, index, end, counter);
  1640. // We've either reached the end of the string, the address ends in
  1641. // an IPv4 address, or it is invalid. scanHexSequence has already
  1642. // made sure that we have the right number of bits.
  1643. return (index == end) ||
  1644. (index != -1 && isWellFormedIPv4Address(
  1645. address.substring((counter[0] > prevCount) ? index+1 : index, end)));
  1646. }
  1647. /**
  1648. * Helper method for isWellFormedIPv6Reference which scans the
  1649. * hex sequences of an IPv6 address. It returns the index of the
  1650. * next character to scan in the address, or -1 if the string
  1651. * cannot match a valid IPv6 address.
  1652. *
  1653. * @param address the string to be scanned
  1654. * @param index the beginning index (inclusive)
  1655. * @param end the ending index (exclusive)
  1656. * @param counter a counter for the number of 16-bit sections read
  1657. * in the address
  1658. *
  1659. * @return the index of the next character to scan, or -1 if the
  1660. * string cannot match a valid IPv6 address
  1661. */
  1662. private static int scanHexSequence (String address, int index, int end, int [] counter) {
  1663. char testChar;
  1664. int numDigits = 0;
  1665. int start = index;
  1666. // Trying to match the following productions:
  1667. // hexseq = hex4 *( ":" hex4)
  1668. // hex4 = 1*4HEXDIG
  1669. for (; index < end; ++index) {
  1670. testChar = address.charAt(index);
  1671. if (testChar == ':') {
  1672. // IPv6 addresses are 128-bit, so there can be at most eight sections.
  1673. if (numDigits > 0 && ++counter[0] > 8) {
  1674. return -1;
  1675. }
  1676. // This could be '::'.
  1677. if (numDigits == 0 || ((index+1 < end) && address.charAt(index+1) == ':')) {
  1678. return index;
  1679. }
  1680. numDigits = 0;
  1681. }
  1682. // This might be invalid or an IPv4address. If it's potentially an IPv4address,
  1683. // backup to just after the last valid character that matches hexseq.
  1684. else if (!isHex(testChar)) {
  1685. if (testChar == '.' && numDigits < 4 && numDigits > 0 && counter[0] <= 6) {
  1686. int back = index - numDigits - 1;
  1687. return (back >= start) ? back : (back+1);
  1688. }
  1689. return -1;
  1690. }
  1691. // There can be at most 4 hex digits per group.
  1692. else if (++numDigits > 4) {
  1693. return -1;
  1694. }
  1695. }
  1696. return (numDigits > 0 && ++counter[0] <= 8) ? end : -1;
  1697. }
  1698. /**
  1699. * Determine whether a char is a digit.
  1700. *
  1701. * @return true if the char is betweeen '0' and '9', false otherwise
  1702. */
  1703. private static boolean isDigit(char p_char) {
  1704. return p_char >= '0' && p_char <= '9';
  1705. }
  1706. /**
  1707. * Determine whether a character is a hexadecimal character.
  1708. *
  1709. * @return true if the char is betweeen '0' and '9', 'a' and 'f'
  1710. * or 'A' and 'F', false otherwise
  1711. */
  1712. private static boolean isHex(char p_char) {
  1713. return (p_char <= 'f' && (fgLookupTable[p_char] & ASCII_HEX_CHARACTERS) != 0);
  1714. }
  1715. /**
  1716. * Determine whether a char is an alphabetic character: a-z or A-Z
  1717. *
  1718. * @return true if the char is alphabetic, false otherwise
  1719. */
  1720. private static boolean isAlpha(char p_char) {
  1721. return ((p_char >= 'a' && p_char <= 'z') || (p_char >= 'A' && p_char <= 'Z' ));
  1722. }
  1723. /**
  1724. * Determine whether a char is an alphanumeric: 0-9, a-z or A-Z
  1725. *
  1726. * @return true if the char is alphanumeric, false otherwise
  1727. */
  1728. private static boolean isAlphanum(char p_char) {
  1729. return (p_char <= 'z' && (fgLookupTable[p_char] & MASK_ALPHA_NUMERIC) != 0);
  1730. }
  1731. /**
  1732. * Determine whether a character is a reserved character:
  1733. * ';', '/', '?', ':', '@', '&', '=', '+', '$', ',', '[', or ']'
  1734. *
  1735. * @return true if the string contains any reserved characters
  1736. */
  1737. private static boolean isReservedCharacter(char p_char) {
  1738. return (p_char <= ']' && (fgLookupTable[p_char] & RESERVED_CHARACTERS) != 0);
  1739. }
  1740. /**
  1741. * Determine whether a char is an unreserved character.
  1742. *
  1743. * @return true if the char is unreserved, false otherwise
  1744. */
  1745. private static boolean isUnreservedCharacter(char p_char) {
  1746. return (p_char <= '~' && (fgLookupTable[p_char] & MASK_UNRESERVED_MASK) != 0);
  1747. }
  1748. /**
  1749. * Determine whether a char is a URI character (reserved or
  1750. * unreserved, not including '%' for escaped octets).
  1751. *
  1752. * @return true if the char is a URI character, false otherwise
  1753. */
  1754. private static boolean isURICharacter (char p_char) {
  1755. return (p_char <= '~' && (fgLookupTable[p_char] & MASK_URI_CHARACTER) != 0);
  1756. }
  1757. /**
  1758. * Determine whether a char is a scheme character.
  1759. *
  1760. * @return true if the char is a scheme character, false otherwise
  1761. */
  1762. private static boolean isSchemeCharacter (char p_char) {
  1763. return (p_char <= 'z' && (fgLookupTable[p_char] & MASK_SCHEME_CHARACTER) != 0);
  1764. }
  1765. /**
  1766. * Determine whether a char is a userinfo character.
  1767. *
  1768. * @return true if the char is a userinfo character, false otherwise
  1769. */
  1770. private static boolean isUserinfoCharacter (char p_char) {
  1771. return (p_char <= 'z' && (fgLookupTable[p_char] & MASK_USERINFO_CHARACTER) != 0);
  1772. }
  1773. /**
  1774. * Determine whether a char is a path character.
  1775. *
  1776. * @return true if the char is a path character, false otherwise
  1777. */
  1778. private static boolean isPathCharacter (char p_char) {
  1779. return (p_char <= '~' && (fgLookupTable[p_char] & MASK_PATH_CHARACTER) != 0);
  1780. }
  1781. /**
  1782. * Determine whether a given string contains only URI characters (also
  1783. * called "uric" in RFC 2396). uric consist of all reserved
  1784. * characters, unreserved characters and escaped characters.
  1785. *
  1786. * @return true if the string is comprised of uric, false otherwise
  1787. */
  1788. private static boolean isURIString(String p_uric) {
  1789. if (p_uric == null) {
  1790. return false;
  1791. }
  1792. int end = p_uric.length();
  1793. char testChar = '\0';
  1794. for (int i = 0; i < end; i++) {
  1795. testChar = p_uric.charAt(i);
  1796. if (testChar == '%') {
  1797. if (i+2 >= end ||
  1798. !isHex(p_uric.charAt(i+1)) ||
  1799. !isHex(p_uric.charAt(i+2))) {
  1800. return false;
  1801. }
  1802. else {
  1803. i += 2;
  1804. continue;
  1805. }
  1806. }
  1807. if (isURICharacter(testChar)) {
  1808. continue;
  1809. }
  1810. else {
  1811. return false;
  1812. }
  1813. }
  1814. return true;
  1815. }
  1816. }