1. /*
  2. * $Header: /home/cvs/jakarta-commons/httpclient/src/java/org/apache/commons/httpclient/URI.java,v 1.47 2004/05/13 04:03:25 mbecke Exp $
  3. * $Revision: 1.47 $
  4. * $Date: 2004/05/13 04:03:25 $
  5. *
  6. * ====================================================================
  7. *
  8. * Copyright 2002-2004 The Apache Software Foundation
  9. *
  10. * Licensed under the Apache License, Version 2.0 (the "License");
  11. * you may not use this file except in compliance with the License.
  12. * You may obtain a copy of the License at
  13. *
  14. * http://www.apache.org/licenses/LICENSE-2.0
  15. *
  16. * Unless required by applicable law or agreed to in writing, software
  17. * distributed under the License is distributed on an "AS IS" BASIS,
  18. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  19. * See the License for the specific language governing permissions and
  20. * limitations under the License.
  21. * ====================================================================
  22. *
  23. * This software consists of voluntary contributions made by many
  24. * individuals on behalf of the Apache Software Foundation. For more
  25. * information on the Apache Software Foundation, please see
  26. * <http://www.apache.org/>.
  27. *
  28. */
  29. package org.apache.commons.httpclient;
  30. import java.io.IOException;
  31. import java.io.ObjectInputStream;
  32. import java.io.ObjectOutputStream;
  33. import java.io.Serializable;
  34. import java.util.Locale;
  35. import java.util.BitSet;
  36. import java.util.Hashtable;
  37. import org.apache.commons.codec.DecoderException;
  38. import org.apache.commons.codec.net.URLCodec;
  39. import org.apache.commons.httpclient.util.EncodingUtil;
  40. /**
  41. * The interface for the URI(Uniform Resource Identifiers) version of RFC 2396.
  42. * This class has the purpose of supportting of parsing a URI reference to
  43. * extend any specific protocols, the character encoding of the protocol to
  44. * be transported and the charset of the document.
  45. * <p>
  46. * A URI is always in an "escaped" form, since escaping or unescaping a
  47. * completed URI might change its semantics.
  48. * <p>
  49. * Implementers should be careful not to escape or unescape the same string
  50. * more than once, since unescaping an already unescaped string might lead to
  51. * misinterpreting a percent data character as another escaped character,
  52. * or vice versa in the case of escaping an already escaped string.
  53. * <p>
  54. * In order to avoid these problems, data types used as follows:
  55. * <p><blockquote><pre>
  56. * URI character sequence: char
  57. * octet sequence: byte
  58. * original character sequence: String
  59. * </pre></blockquote><p>
  60. *
  61. * So, a URI is a sequence of characters as an array of a char type, which
  62. * is not always represented as a sequence of octets as an array of byte.
  63. * <p>
  64. *
  65. * URI Syntactic Components
  66. * <p><blockquote><pre>
  67. * - In general, written as follows:
  68. * Absolute URI = <scheme>:<scheme-specific-part>
  69. * Generic URI = <scheme>://<authority><path>?<query>
  70. *
  71. * - Syntax
  72. * absoluteURI = scheme ":" ( hier_part | opaque_part )
  73. * hier_part = ( net_path | abs_path ) [ "?" query ]
  74. * net_path = "//" authority [ abs_path ]
  75. * abs_path = "/" path_segments
  76. * </pre></blockquote><p>
  77. *
  78. * The following examples illustrate URI that are in common use.
  79. * <pre>
  80. * ftp://ftp.is.co.za/rfc/rfc1808.txt
  81. * -- ftp scheme for File Transfer Protocol services
  82. * gopher://spinaltap.micro.umn.edu/00/Weather/California/Los%20Angeles
  83. * -- gopher scheme for Gopher and Gopher+ Protocol services
  84. * http://www.math.uio.no/faq/compression-faq/part1.html
  85. * -- http scheme for Hypertext Transfer Protocol services
  86. * mailto:mduerst@ifi.unizh.ch
  87. * -- mailto scheme for electronic mail addresses
  88. * news:comp.infosystems.www.servers.unix
  89. * -- news scheme for USENET news groups and articles
  90. * telnet://melvyl.ucop.edu/
  91. * -- telnet scheme for interactive services via the TELNET Protocol
  92. * </pre>
  93. * Please, notice that there are many modifications from URL(RFC 1738) and
  94. * relative URL(RFC 1808).
  95. * <p>
  96. * <b>The expressions for a URI</b>
  97. * <p><pre>
  98. * For escaped URI forms
  99. * - URI(char[]) // constructor
  100. * - char[] getRawXxx() // method
  101. * - String getEscapedXxx() // method
  102. * - String toString() // method
  103. * <p>
  104. * For unescaped URI forms
  105. * - URI(String) // constructor
  106. * - String getXXX() // method
  107. * </pre><p>
  108. *
  109. * @author <a href="mailto:jericho@apache.org">Sung-Gu</a>
  110. * @author <a href="mailto:mbowler@GargoyleSoftware.com">Mike Bowler</a>
  111. * @version $Revision: 1.47 $ $Date: 2002/03/14 15:14:01
  112. */
  113. public class URI implements Cloneable, Comparable, Serializable {
  114. // ----------------------------------------------------------- Constructors
  115. /** Create an instance as an internal use */
  116. protected URI() {
  117. }
  118. /**
  119. * Construct a URI from a string with the given charset. The input string can
  120. * be either in escaped or unescaped form.
  121. *
  122. * @param s URI character sequence
  123. * @param escaped <tt>true</tt> if URI character sequence is in escaped form.
  124. * <tt>false</tt> otherwise.
  125. * @param charset the charset string to do escape encoding, if required
  126. *
  127. * @throws URIException If the URI cannot be created.
  128. * @throws NullPointerException if input string is <code>null</code>
  129. *
  130. * @see #getProtocolCharset
  131. *
  132. * @since 3.0
  133. */
  134. public URI(String s, boolean escaped, String charset)
  135. throws URIException, NullPointerException {
  136. protocolCharset = charset;
  137. parseUriReference(s, escaped);
  138. }
  139. /**
  140. * Construct a URI from a string with the given charset. The input string can
  141. * be either in escaped or unescaped form.
  142. *
  143. * @param s URI character sequence
  144. * @param escaped <tt>true</tt> if URI character sequence is in escaped form.
  145. * <tt>false</tt> otherwise.
  146. *
  147. * @throws URIException If the URI cannot be created.
  148. * @throws NullPointerException if input string is <code>null</code>
  149. *
  150. * @see #getProtocolCharset
  151. *
  152. * @since 3.0
  153. */
  154. public URI(String s, boolean escaped)
  155. throws URIException, NullPointerException {
  156. parseUriReference(s, escaped);
  157. }
  158. /**
  159. * Construct a URI as an escaped form of a character array with the given
  160. * charset.
  161. *
  162. * @param escaped the URI character sequence
  163. * @param charset the charset string to do escape encoding
  164. * @throws URIException If the URI cannot be created.
  165. * @throws NullPointerException if <code>escaped</code> is <code>null</code>
  166. * @see #getProtocolCharset
  167. *
  168. * @deprecated Use #URI(String, boolean, String)
  169. */
  170. public URI(char[] escaped, String charset)
  171. throws URIException, NullPointerException {
  172. protocolCharset = charset;
  173. parseUriReference(new String(escaped), true);
  174. }
  175. /**
  176. * Construct a URI as an escaped form of a character array.
  177. * An URI can be placed within double-quotes or angle brackets like
  178. * "http://test.com/" and <http://test.com/>
  179. *
  180. * @param escaped the URI character sequence
  181. * @throws URIException If the URI cannot be created.
  182. * @throws NullPointerException if <code>escaped</code> is <code>null</code>
  183. * @see #getDefaultProtocolCharset
  184. *
  185. * @deprecated Use #URI(String, boolean)
  186. */
  187. public URI(char[] escaped)
  188. throws URIException, NullPointerException {
  189. parseUriReference(new String(escaped), true);
  190. }
  191. /**
  192. * Construct a URI from the given string with the given charset.
  193. *
  194. * @param original the string to be represented to URI character sequence
  195. * It is one of absoluteURI and relativeURI.
  196. * @param charset the charset string to do escape encoding
  197. * @throws URIException If the URI cannot be created.
  198. * @see #getProtocolCharset
  199. *
  200. * @deprecated Use #URI(String, boolean, String)
  201. */
  202. public URI(String original, String charset) throws URIException {
  203. protocolCharset = charset;
  204. parseUriReference(original, false);
  205. }
  206. /**
  207. * Construct a URI from the given string.
  208. * <p><blockquote><pre>
  209. * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
  210. * </pre></blockquote><p>
  211. * An URI can be placed within double-quotes or angle brackets like
  212. * "http://test.com/" and <http://test.com/>
  213. *
  214. * @param original the string to be represented to URI character sequence
  215. * It is one of absoluteURI and relativeURI.
  216. * @throws URIException If the URI cannot be created.
  217. * @see #getDefaultProtocolCharset
  218. *
  219. * @deprecated Use #URI(String, boolean)
  220. */
  221. public URI(String original) throws URIException {
  222. parseUriReference(original, false);
  223. }
  224. /**
  225. * Construct a general URI from the given components.
  226. * <p><blockquote><pre>
  227. * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
  228. * absoluteURI = scheme ":" ( hier_part | opaque_part )
  229. * opaque_part = uric_no_slash *uric
  230. * </pre></blockquote><p>
  231. * It's for absolute URI = <scheme>:<scheme-specific-part>#
  232. * <fragment>.
  233. *
  234. * @param scheme the scheme string
  235. * @param schemeSpecificPart scheme_specific_part
  236. * @param fragment the fragment string
  237. * @throws URIException If the URI cannot be created.
  238. * @see #getDefaultProtocolCharset
  239. */
  240. public URI(String scheme, String schemeSpecificPart, String fragment)
  241. throws URIException {
  242. // validate and contruct the URI character sequence
  243. if (scheme == null) {
  244. throw new URIException(URIException.PARSING, "scheme required");
  245. }
  246. char[] s = scheme.toLowerCase().toCharArray();
  247. if (validate(s, URI.scheme)) {
  248. _scheme = s; // is_absoluteURI
  249. } else {
  250. throw new URIException(URIException.PARSING, "incorrect scheme");
  251. }
  252. _opaque = encode(schemeSpecificPart, allowed_opaque_part,
  253. getProtocolCharset());
  254. // Set flag
  255. _is_opaque_part = true;
  256. _fragment = fragment.toCharArray();
  257. setURI();
  258. }
  259. /**
  260. * Construct a general URI from the given components.
  261. * <p><blockquote><pre>
  262. * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
  263. * absoluteURI = scheme ":" ( hier_part | opaque_part )
  264. * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
  265. * hier_part = ( net_path | abs_path ) [ "?" query ]
  266. * </pre></blockquote><p>
  267. * It's for absolute URI = <scheme>:<path>?<query>#<
  268. * fragment> and relative URI = <path>?<query>#<fragment
  269. * >.
  270. *
  271. * @param scheme the scheme string
  272. * @param authority the authority string
  273. * @param path the path string
  274. * @param query the query string
  275. * @param fragment the fragment string
  276. * @throws URIException If the new URI cannot be created.
  277. * @see #getDefaultProtocolCharset
  278. */
  279. public URI(String scheme, String authority, String path, String query,
  280. String fragment) throws URIException {
  281. // validate and contruct the URI character sequence
  282. StringBuffer buff = new StringBuffer();
  283. if (scheme != null) {
  284. buff.append(scheme);
  285. buff.append(':');
  286. }
  287. if (authority != null) {
  288. buff.append("//");
  289. buff.append(authority);
  290. }
  291. if (path != null) { // accept empty path
  292. if ((scheme != null || authority != null)
  293. && !path.startsWith("/")) {
  294. throw new URIException(URIException.PARSING,
  295. "abs_path requested");
  296. }
  297. buff.append(path);
  298. }
  299. if (query != null) {
  300. buff.append('?');
  301. buff.append(query);
  302. }
  303. if (fragment != null) {
  304. buff.append('#');
  305. buff.append(fragment);
  306. }
  307. parseUriReference(buff.toString(), false);
  308. }
  309. /**
  310. * Construct a general URI from the given components.
  311. *
  312. * @param scheme the scheme string
  313. * @param userinfo the userinfo string
  314. * @param host the host string
  315. * @param port the port number
  316. * @throws URIException If the new URI cannot be created.
  317. * @see #getDefaultProtocolCharset
  318. */
  319. public URI(String scheme, String userinfo, String host, int port)
  320. throws URIException {
  321. this(scheme, userinfo, host, port, null, null, null);
  322. }
  323. /**
  324. * Construct a general URI from the given components.
  325. *
  326. * @param scheme the scheme string
  327. * @param userinfo the userinfo string
  328. * @param host the host string
  329. * @param port the port number
  330. * @param path the path string
  331. * @throws URIException If the new URI cannot be created.
  332. * @see #getDefaultProtocolCharset
  333. */
  334. public URI(String scheme, String userinfo, String host, int port,
  335. String path) throws URIException {
  336. this(scheme, userinfo, host, port, path, null, null);
  337. }
  338. /**
  339. * Construct a general URI from the given components.
  340. *
  341. * @param scheme the scheme string
  342. * @param userinfo the userinfo string
  343. * @param host the host string
  344. * @param port the port number
  345. * @param path the path string
  346. * @param query the query string
  347. * @throws URIException If the new URI cannot be created.
  348. * @see #getDefaultProtocolCharset
  349. */
  350. public URI(String scheme, String userinfo, String host, int port,
  351. String path, String query) throws URIException {
  352. this(scheme, userinfo, host, port, path, query, null);
  353. }
  354. /**
  355. * Construct a general URI from the given components.
  356. *
  357. * @param scheme the scheme string
  358. * @param userinfo the userinfo string
  359. * @param host the host string
  360. * @param port the port number
  361. * @param path the path string
  362. * @param query the query string
  363. * @param fragment the fragment string
  364. * @throws URIException If the new URI cannot be created.
  365. * @see #getDefaultProtocolCharset
  366. */
  367. public URI(String scheme, String userinfo, String host, int port,
  368. String path, String query, String fragment) throws URIException {
  369. this(scheme, (host == null) ? null
  370. : ((userinfo != null) ? userinfo + '@' : "") + host
  371. + ((port != -1) ? ":" + port : ""), path, query, fragment);
  372. }
  373. /**
  374. * Construct a general URI from the given components.
  375. *
  376. * @param scheme the scheme string
  377. * @param host the host string
  378. * @param path the path string
  379. * @param fragment the fragment string
  380. * @throws URIException If the new URI cannot be created.
  381. * @see #getDefaultProtocolCharset
  382. */
  383. public URI(String scheme, String host, String path, String fragment)
  384. throws URIException {
  385. this(scheme, host, path, null, fragment);
  386. }
  387. /**
  388. * Construct a general URI with the given relative URI string.
  389. *
  390. * @param base the base URI
  391. * @param relative the relative URI string
  392. * @throws URIException If the new URI cannot be created.
  393. *
  394. * @deprecated Use #URI(URI, String, boolean)
  395. */
  396. public URI(URI base, String relative) throws URIException {
  397. this(base, new URI(relative));
  398. }
  399. /**
  400. * Construct a general URI with the given relative URI string.
  401. *
  402. * @param base the base URI
  403. * @param relative the relative URI string
  404. * @param escaped <tt>true</tt> if URI character sequence is in escaped form.
  405. * <tt>false</tt> otherwise.
  406. *
  407. * @throws URIException If the new URI cannot be created.
  408. *
  409. * @since 3.0
  410. */
  411. public URI(URI base, String relative, boolean escaped) throws URIException {
  412. this(base, new URI(relative, escaped));
  413. }
  414. /**
  415. * Construct a general URI with the given relative URI.
  416. * <p><blockquote><pre>
  417. * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
  418. * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
  419. * </pre></blockquote><p>
  420. * Resolving Relative References to Absolute Form.
  421. *
  422. * <strong>Examples of Resolving Relative URI References</strong>
  423. *
  424. * Within an object with a well-defined base URI of
  425. * <p><blockquote><pre>
  426. * http://a/b/c/d;p?q
  427. * </pre></blockquote><p>
  428. * the relative URI would be resolved as follows:
  429. *
  430. * Normal Examples
  431. *
  432. * <p><blockquote><pre>
  433. * g:h = g:h
  434. * g = http://a/b/c/g
  435. * ./g = http://a/b/c/g
  436. * g/ = http://a/b/c/g/
  437. * /g = http://a/g
  438. * //g = http://g
  439. * ?y = http://a/b/c/?y
  440. * g?y = http://a/b/c/g?y
  441. * #s = (current document)#s
  442. * g#s = http://a/b/c/g#s
  443. * g?y#s = http://a/b/c/g?y#s
  444. * ;x = http://a/b/c/;x
  445. * g;x = http://a/b/c/g;x
  446. * g;x?y#s = http://a/b/c/g;x?y#s
  447. * . = http://a/b/c/
  448. * ./ = http://a/b/c/
  449. * .. = http://a/b/
  450. * ../ = http://a/b/
  451. * ../g = http://a/b/g
  452. * ../.. = http://a/
  453. * ../../ = http://a/
  454. * ../../g = http://a/g
  455. * </pre></blockquote><p>
  456. *
  457. * Some URI schemes do not allow a hierarchical syntax matching the
  458. * <hier_part> syntax, and thus cannot use relative references.
  459. *
  460. * @param base the base URI
  461. * @param relative the relative URI
  462. * @throws URIException If the new URI cannot be created.
  463. */
  464. public URI(URI base, URI relative) throws URIException {
  465. if (base._scheme == null) {
  466. throw new URIException(URIException.PARSING, "base URI required");
  467. }
  468. if (base._scheme != null) {
  469. this._scheme = base._scheme;
  470. this._authority = base._authority;
  471. }
  472. if (base._is_opaque_part || relative._is_opaque_part) {
  473. this._scheme = base._scheme;
  474. this._is_opaque_part = base._is_opaque_part
  475. || relative._is_opaque_part;
  476. this._opaque = relative._opaque;
  477. this._fragment = relative._fragment;
  478. this.setURI();
  479. return;
  480. }
  481. if (relative._scheme != null) {
  482. this._scheme = relative._scheme;
  483. this._is_net_path = relative._is_net_path;
  484. this._authority = relative._authority;
  485. if (relative._is_server) {
  486. this._is_server = relative._is_server;
  487. this._userinfo = relative._userinfo;
  488. this._host = relative._host;
  489. this._port = relative._port;
  490. } else if (relative._is_reg_name) {
  491. this._is_reg_name = relative._is_reg_name;
  492. }
  493. this._is_abs_path = relative._is_abs_path;
  494. this._is_rel_path = relative._is_rel_path;
  495. this._path = relative._path;
  496. } else if (base._authority != null && relative._scheme == null) {
  497. this._is_net_path = base._is_net_path;
  498. this._authority = base._authority;
  499. if (base._is_server) {
  500. this._is_server = base._is_server;
  501. this._userinfo = base._userinfo;
  502. this._host = base._host;
  503. this._port = base._port;
  504. } else if (base._is_reg_name) {
  505. this._is_reg_name = base._is_reg_name;
  506. }
  507. }
  508. if (relative._authority != null) {
  509. this._is_net_path = relative._is_net_path;
  510. this._authority = relative._authority;
  511. if (relative._is_server) {
  512. this._is_server = relative._is_server;
  513. this._userinfo = relative._userinfo;
  514. this._host = relative._host;
  515. this._port = relative._port;
  516. } else if (relative._is_reg_name) {
  517. this._is_reg_name = relative._is_reg_name;
  518. }
  519. this._is_abs_path = relative._is_abs_path;
  520. this._is_rel_path = relative._is_rel_path;
  521. this._path = relative._path;
  522. }
  523. // resolve the path and query if necessary
  524. if (relative._scheme == null && relative._authority == null) {
  525. if ((relative._path == null || relative._path.length == 0)
  526. && relative._query == null) {
  527. // handle a reference to the current document, see RFC 2396
  528. // section 5.2 step 2
  529. this._path = base._path;
  530. this._query = base._query;
  531. } else {
  532. this._path = resolvePath(base._path, relative._path);
  533. }
  534. }
  535. // base._query removed
  536. if (relative._query != null) {
  537. this._query = relative._query;
  538. }
  539. // base._fragment removed
  540. if (relative._fragment != null) {
  541. this._fragment = relative._fragment;
  542. }
  543. this.setURI();
  544. // reparse the newly built URI, this will ensure that all flags are set correctly.
  545. // TODO there must be a better way to do this
  546. parseUriReference(new String(_uri), true);
  547. }
  548. // --------------------------------------------------- Instance Variables
  549. /** Version ID for serialization */
  550. static final long serialVersionUID = 604752400577948726L;
  551. /**
  552. * Cache the hash code for this URI.
  553. */
  554. protected int hash = 0;
  555. /**
  556. * This Uniform Resource Identifier (URI).
  557. * The URI is always in an "escaped" form, since escaping or unescaping
  558. * a completed URI might change its semantics.
  559. */
  560. protected char[] _uri = null;
  561. /**
  562. * The charset of the protocol used by this URI instance.
  563. */
  564. protected String protocolCharset = null;
  565. /**
  566. * The default charset of the protocol. RFC 2277, 2396
  567. */
  568. protected static String defaultProtocolCharset = "UTF-8";
  569. /**
  570. * The default charset of the document. RFC 2277, 2396
  571. * The platform's charset is used for the document by default.
  572. */
  573. protected static String defaultDocumentCharset = null;
  574. protected static String defaultDocumentCharsetByLocale = null;
  575. protected static String defaultDocumentCharsetByPlatform = null;
  576. // Static initializer for defaultDocumentCharset
  577. static {
  578. Locale locale = Locale.getDefault();
  579. // in order to support backward compatiblity
  580. if (locale != null) {
  581. defaultDocumentCharsetByLocale =
  582. LocaleToCharsetMap.getCharset(locale);
  583. // set the default document charset
  584. defaultDocumentCharset = defaultDocumentCharsetByLocale;
  585. }
  586. // in order to support platform encoding
  587. try {
  588. defaultDocumentCharsetByPlatform = System.getProperty("file.encoding");
  589. } catch (SecurityException ignore) {
  590. }
  591. if (defaultDocumentCharset == null) {
  592. // set the default document charset
  593. defaultDocumentCharset = defaultDocumentCharsetByPlatform;
  594. }
  595. }
  596. /**
  597. * The scheme.
  598. */
  599. protected char[] _scheme = null;
  600. /**
  601. * The opaque.
  602. */
  603. protected char[] _opaque = null;
  604. /**
  605. * The authority.
  606. */
  607. protected char[] _authority = null;
  608. /**
  609. * The userinfo.
  610. */
  611. protected char[] _userinfo = null;
  612. /**
  613. * The host.
  614. */
  615. protected char[] _host = null;
  616. /**
  617. * The port.
  618. */
  619. protected int _port = -1;
  620. /**
  621. * The path.
  622. */
  623. protected char[] _path = null;
  624. /**
  625. * The query.
  626. */
  627. protected char[] _query = null;
  628. /**
  629. * The fragment.
  630. */
  631. protected char[] _fragment = null;
  632. /**
  633. * The root path.
  634. */
  635. protected static char[] rootPath = { '/' };
  636. // ---------------------- Generous characters for each component validation
  637. /**
  638. * The percent "%" character always has the reserved purpose of being the
  639. * escape indicator, it must be escaped as "%25" in order to be used as
  640. * data within a URI.
  641. */
  642. protected static final BitSet percent = new BitSet(256);
  643. // Static initializer for percent
  644. static {
  645. percent.set('%');
  646. }
  647. /**
  648. * BitSet for digit.
  649. * <p><blockquote><pre>
  650. * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
  651. * "8" | "9"
  652. * </pre></blockquote><p>
  653. */
  654. protected static final BitSet digit = new BitSet(256);
  655. // Static initializer for digit
  656. static {
  657. for (int i = '0'; i <= '9'; i++) {
  658. digit.set(i);
  659. }
  660. }
  661. /**
  662. * BitSet for alpha.
  663. * <p><blockquote><pre>
  664. * alpha = lowalpha | upalpha
  665. * </pre></blockquote><p>
  666. */
  667. protected static final BitSet alpha = new BitSet(256);
  668. // Static initializer for alpha
  669. static {
  670. for (int i = 'a'; i <= 'z'; i++) {
  671. alpha.set(i);
  672. }
  673. for (int i = 'A'; i <= 'Z'; i++) {
  674. alpha.set(i);
  675. }
  676. }
  677. /**
  678. * BitSet for alphanum (join of alpha & digit).
  679. * <p><blockquote><pre>
  680. * alphanum = alpha | digit
  681. * </pre></blockquote><p>
  682. */
  683. protected static final BitSet alphanum = new BitSet(256);
  684. // Static initializer for alphanum
  685. static {
  686. alphanum.or(alpha);
  687. alphanum.or(digit);
  688. }
  689. /**
  690. * BitSet for hex.
  691. * <p><blockquote><pre>
  692. * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
  693. * "a" | "b" | "c" | "d" | "e" | "f"
  694. * </pre></blockquote><p>
  695. */
  696. protected static final BitSet hex = new BitSet(256);
  697. // Static initializer for hex
  698. static {
  699. hex.or(digit);
  700. for (int i = 'a'; i <= 'f'; i++) {
  701. hex.set(i);
  702. }
  703. for (int i = 'A'; i <= 'F'; i++) {
  704. hex.set(i);
  705. }
  706. }
  707. /**
  708. * BitSet for escaped.
  709. * <p><blockquote><pre>
  710. * escaped = "%" hex hex
  711. * </pre></blockquote><p>
  712. */
  713. protected static final BitSet escaped = new BitSet(256);
  714. // Static initializer for escaped
  715. static {
  716. escaped.or(percent);
  717. escaped.or(hex);
  718. }
  719. /**
  720. * BitSet for mark.
  721. * <p><blockquote><pre>
  722. * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" |
  723. * "(" | ")"
  724. * </pre></blockquote><p>
  725. */
  726. protected static final BitSet mark = new BitSet(256);
  727. // Static initializer for mark
  728. static {
  729. mark.set('-');
  730. mark.set('_');
  731. mark.set('.');
  732. mark.set('!');
  733. mark.set('~');
  734. mark.set('*');
  735. mark.set('\'');
  736. mark.set('(');
  737. mark.set(')');
  738. }
  739. /**
  740. * Data characters that are allowed in a URI but do not have a reserved
  741. * purpose are called unreserved.
  742. * <p><blockquote><pre>
  743. * unreserved = alphanum | mark
  744. * </pre></blockquote><p>
  745. */
  746. protected static final BitSet unreserved = new BitSet(256);
  747. // Static initializer for unreserved
  748. static {
  749. unreserved.or(alphanum);
  750. unreserved.or(mark);
  751. }
  752. /**
  753. * BitSet for reserved.
  754. * <p><blockquote><pre>
  755. * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
  756. * "$" | ","
  757. * </pre></blockquote><p>
  758. */
  759. protected static final BitSet reserved = new BitSet(256);
  760. // Static initializer for reserved
  761. static {
  762. reserved.set(';');
  763. reserved.set('/');
  764. reserved.set('?');
  765. reserved.set(':');
  766. reserved.set('@');
  767. reserved.set('&');
  768. reserved.set('=');
  769. reserved.set('+');
  770. reserved.set('$');
  771. reserved.set(',');
  772. }
  773. /**
  774. * BitSet for uric.
  775. * <p><blockquote><pre>
  776. * uric = reserved | unreserved | escaped
  777. * </pre></blockquote><p>
  778. */
  779. protected static final BitSet uric = new BitSet(256);
  780. // Static initializer for uric
  781. static {
  782. uric.or(reserved);
  783. uric.or(unreserved);
  784. uric.or(escaped);
  785. }
  786. /**
  787. * BitSet for fragment (alias for uric).
  788. * <p><blockquote><pre>
  789. * fragment = *uric
  790. * </pre></blockquote><p>
  791. */
  792. protected static final BitSet fragment = uric;
  793. /**
  794. * BitSet for query (alias for uric).
  795. * <p><blockquote><pre>
  796. * query = *uric
  797. * </pre></blockquote><p>
  798. */
  799. protected static final BitSet query = uric;
  800. /**
  801. * BitSet for pchar.
  802. * <p><blockquote><pre>
  803. * pchar = unreserved | escaped |
  804. * ":" | "@" | "&" | "=" | "+" | "$" | ","
  805. * </pre></blockquote><p>
  806. */
  807. protected static final BitSet pchar = new BitSet(256);
  808. // Static initializer for pchar
  809. static {
  810. pchar.or(unreserved);
  811. pchar.or(escaped);
  812. pchar.set(':');
  813. pchar.set('@');
  814. pchar.set('&');
  815. pchar.set('=');
  816. pchar.set('+');
  817. pchar.set('$');
  818. pchar.set(',');
  819. }
  820. /**
  821. * BitSet for param (alias for pchar).
  822. * <p><blockquote><pre>
  823. * param = *pchar
  824. * </pre></blockquote><p>
  825. */
  826. protected static final BitSet param = pchar;
  827. /**
  828. * BitSet for segment.
  829. * <p><blockquote><pre>
  830. * segment = *pchar *( ";" param )
  831. * </pre></blockquote><p>
  832. */
  833. protected static final BitSet segment = new BitSet(256);
  834. // Static initializer for segment
  835. static {
  836. segment.or(pchar);
  837. segment.set(';');
  838. segment.or(param);
  839. }
  840. /**
  841. * BitSet for path segments.
  842. * <p><blockquote><pre>
  843. * path_segments = segment *( "/" segment )
  844. * </pre></blockquote><p>
  845. */
  846. protected static final BitSet path_segments = new BitSet(256);
  847. // Static initializer for path_segments
  848. static {
  849. path_segments.set('/');
  850. path_segments.or(segment);
  851. }
  852. /**
  853. * URI absolute path.
  854. * <p><blockquote><pre>
  855. * abs_path = "/" path_segments
  856. * </pre></blockquote><p>
  857. */
  858. protected static final BitSet abs_path = new BitSet(256);
  859. // Static initializer for abs_path
  860. static {
  861. abs_path.set('/');
  862. abs_path.or(path_segments);
  863. }
  864. /**
  865. * URI bitset for encoding typical non-slash characters.
  866. * <p><blockquote><pre>
  867. * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
  868. * "&" | "=" | "+" | "$" | ","
  869. * </pre></blockquote><p>
  870. */
  871. protected static final BitSet uric_no_slash = new BitSet(256);
  872. // Static initializer for uric_no_slash
  873. static {
  874. uric_no_slash.or(unreserved);
  875. uric_no_slash.or(escaped);
  876. uric_no_slash.set(';');
  877. uric_no_slash.set('?');
  878. uric_no_slash.set(';');
  879. uric_no_slash.set('@');
  880. uric_no_slash.set('&');
  881. uric_no_slash.set('=');
  882. uric_no_slash.set('+');
  883. uric_no_slash.set('$');
  884. uric_no_slash.set(',');
  885. }
  886. /**
  887. * URI bitset that combines uric_no_slash and uric.
  888. * <p><blockquote><pre>
  889. * opaque_part = uric_no_slash *uric
  890. * </pre></blockquote><p>
  891. */
  892. protected static final BitSet opaque_part = new BitSet(256);
  893. // Static initializer for opaque_part
  894. static {
  895. // it's generous. because first character must not include a slash
  896. opaque_part.or(uric_no_slash);
  897. opaque_part.or(uric);
  898. }
  899. /**
  900. * URI bitset that combines absolute path and opaque part.
  901. * <p><blockquote><pre>
  902. * path = [ abs_path | opaque_part ]
  903. * </pre></blockquote><p>
  904. */
  905. protected static final BitSet path = new BitSet(256);
  906. // Static initializer for path
  907. static {
  908. path.or(abs_path);
  909. path.or(opaque_part);
  910. }
  911. /**
  912. * Port, a logical alias for digit.
  913. */
  914. protected static final BitSet port = digit;
  915. /**
  916. * Bitset that combines digit and dot fo IPv$address.
  917. * <p><blockquote><pre>
  918. * IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit
  919. * </pre></blockquote><p>
  920. */
  921. protected static final BitSet IPv4address = new BitSet(256);
  922. // Static initializer for IPv4address
  923. static {
  924. IPv4address.or(digit);
  925. IPv4address.set('.');
  926. }
  927. /**
  928. * RFC 2373.
  929. * <p><blockquote><pre>
  930. * IPv6address = hexpart [ ":" IPv4address ]
  931. * </pre></blockquote><p>
  932. */
  933. protected static final BitSet IPv6address = new BitSet(256);
  934. // Static initializer for IPv6address reference
  935. static {
  936. IPv6address.or(hex); // hexpart
  937. IPv6address.set(':');
  938. IPv6address.or(IPv4address);
  939. }
  940. /**
  941. * RFC 2732, 2373.
  942. * <p><blockquote><pre>
  943. * IPv6reference = "[" IPv6address "]"
  944. * </pre></blockquote><p>
  945. */
  946. protected static final BitSet IPv6reference = new BitSet(256);
  947. // Static initializer for IPv6reference
  948. static {
  949. IPv6reference.set('[');
  950. IPv6reference.or(IPv6address);
  951. IPv6reference.set(']');
  952. }
  953. /**
  954. * BitSet for toplabel.
  955. * <p><blockquote><pre>
  956. * toplabel = alpha | alpha *( alphanum | "-" ) alphanum
  957. * </pre></blockquote><p>
  958. */
  959. protected static final BitSet toplabel = new BitSet(256);
  960. // Static initializer for toplabel
  961. static {
  962. toplabel.or(alphanum);
  963. toplabel.set('-');
  964. }
  965. /**
  966. * BitSet for domainlabel.
  967. * <p><blockquote><pre>
  968. * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
  969. * </pre></blockquote><p>
  970. */
  971. protected static final BitSet domainlabel = toplabel;
  972. /**
  973. * BitSet for hostname.
  974. * <p><blockquote><pre>
  975. * hostname = *( domainlabel "." ) toplabel [ "." ]
  976. * </pre></blockquote><p>
  977. */
  978. protected static final BitSet hostname = new BitSet(256);
  979. // Static initializer for hostname
  980. static {
  981. hostname.or(toplabel);
  982. // hostname.or(domainlabel);
  983. hostname.set('.');
  984. }
  985. /**
  986. * BitSet for host.
  987. * <p><blockquote><pre>
  988. * host = hostname | IPv4address | IPv6reference
  989. * </pre></blockquote><p>
  990. */
  991. protected static final BitSet host = new BitSet(256);
  992. // Static initializer for host
  993. static {
  994. host.or(hostname);
  995. // host.or(IPv4address);
  996. host.or(IPv6reference); // IPv4address
  997. }
  998. /**
  999. * BitSet for hostport.
  1000. * <p><blockquote><pre>
  1001. * hostport = host [ ":" port ]
  1002. * </pre></blockquote><p>
  1003. */
  1004. protected static final BitSet hostport = new BitSet(256);
  1005. // Static initializer for hostport
  1006. static {
  1007. hostport.or(host);
  1008. hostport.set(':');
  1009. hostport.or(port);
  1010. }
  1011. /**
  1012. * Bitset for userinfo.
  1013. * <p><blockquote><pre>
  1014. * userinfo = *( unreserved | escaped |
  1015. * ";" | ":" | "&" | "=" | "+" | "$" | "," )
  1016. * </pre></blockquote><p>
  1017. */
  1018. protected static final BitSet userinfo = new BitSet(256);
  1019. // Static initializer for userinfo
  1020. static {
  1021. userinfo.or(unreserved);
  1022. userinfo.or(escaped);
  1023. userinfo.set(';');
  1024. userinfo.set(':');
  1025. userinfo.set('&');
  1026. userinfo.set('=');
  1027. userinfo.set('+');
  1028. userinfo.set('$');
  1029. userinfo.set(',');
  1030. }
  1031. /**
  1032. * BitSet for within the userinfo component like user and password.
  1033. */
  1034. public static final BitSet within_userinfo = new BitSet(256);
  1035. // Static initializer for within_userinfo
  1036. static {
  1037. within_userinfo.or(userinfo);
  1038. within_userinfo.clear(';'); // reserved within authority
  1039. within_userinfo.clear(':');
  1040. within_userinfo.clear('@');
  1041. within_userinfo.clear('?');
  1042. within_userinfo.clear('/');
  1043. }
  1044. /**
  1045. * Bitset for server.
  1046. * <p><blockquote><pre>
  1047. * server = [ [ userinfo "@" ] hostport ]
  1048. * </pre></blockquote><p>
  1049. */
  1050. protected static final BitSet server = new BitSet(256);
  1051. // Static initializer for server
  1052. static {
  1053. server.or(userinfo);
  1054. server.set('@');
  1055. server.or(hostport);
  1056. }
  1057. /**
  1058. * BitSet for reg_name.
  1059. * <p><blockquote><pre>
  1060. * reg_name = 1*( unreserved | escaped | "$" | "," |
  1061. * ";" | ":" | "@" | "&" | "=" | "+" )
  1062. * </pre></blockquote><p>
  1063. */
  1064. protected static final BitSet reg_name = new BitSet(256);
  1065. // Static initializer for reg_name
  1066. static {
  1067. reg_name.or(unreserved);
  1068. reg_name.or(escaped);
  1069. reg_name.set('$');
  1070. reg_name.set(',');
  1071. reg_name.set(';');
  1072. reg_name.set(':');
  1073. reg_name.set('@');
  1074. reg_name.set('&');
  1075. reg_name.set('=');
  1076. reg_name.set('+');
  1077. }
  1078. /**
  1079. * BitSet for authority.
  1080. * <p><blockquote><pre>
  1081. * authority = server | reg_name
  1082. * </pre></blockquote><p>
  1083. */
  1084. protected static final BitSet authority = new BitSet(256);
  1085. // Static initializer for authority
  1086. static {
  1087. authority.or(server);
  1088. authority.or(reg_name);
  1089. }
  1090. /**
  1091. * BitSet for scheme.
  1092. * <p><blockquote><pre>
  1093. * scheme = alpha *( alpha | digit | "+" | "-" | "." )
  1094. * </pre></blockquote><p>
  1095. */
  1096. protected static final BitSet scheme = new BitSet(256);
  1097. // Static initializer for scheme
  1098. static {
  1099. scheme.or(alpha);
  1100. scheme.or(digit);
  1101. scheme.set('+');
  1102. scheme.set('-');
  1103. scheme.set('.');
  1104. }
  1105. /**
  1106. * BitSet for rel_segment.
  1107. * <p><blockquote><pre>
  1108. * rel_segment = 1*( unreserved | escaped |
  1109. * ";" | "@" | "&" | "=" | "+" | "$" | "," )
  1110. * </pre></blockquote><p>
  1111. */
  1112. protected static final BitSet rel_segment = new BitSet(256);
  1113. // Static initializer for rel_segment
  1114. static {
  1115. rel_segment.or(unreserved);
  1116. rel_segment.or(escaped);
  1117. rel_segment.set(';');
  1118. rel_segment.set('@');
  1119. rel_segment.set('&');
  1120. rel_segment.set('=');
  1121. rel_segment.set('+');
  1122. rel_segment.set('$');
  1123. rel_segment.set(',');
  1124. }
  1125. /**
  1126. * BitSet for rel_path.
  1127. * <p><blockquote><pre>
  1128. * rel_path = rel_segment [ abs_path ]
  1129. * </pre></blockquote><p>
  1130. */
  1131. protected static final BitSet rel_path = new BitSet(256);
  1132. // Static initializer for rel_path
  1133. static {
  1134. rel_path.or(rel_segment);
  1135. rel_path.or(abs_path);
  1136. }
  1137. /**
  1138. * BitSet for net_path.
  1139. * <p><blockquote><pre>
  1140. * net_path = "//" authority [ abs_path ]
  1141. * </pre></blockquote><p>
  1142. */
  1143. protected static final BitSet net_path = new BitSet(256);
  1144. // Static initializer for net_path
  1145. static {
  1146. net_path.set('/');
  1147. net_path.or(authority);
  1148. net_path.or(abs_path);
  1149. }
  1150. /**
  1151. * BitSet for hier_part.
  1152. * <p><blockquote><pre>
  1153. * hier_part = ( net_path | abs_path ) [ "?" query ]
  1154. * </pre></blockquote><p>
  1155. */
  1156. protected static final BitSet hier_part = new BitSet(256);
  1157. // Static initializer for hier_part
  1158. static {
  1159. hier_part.or(net_path);
  1160. hier_part.or(abs_path);
  1161. // hier_part.set('?'); aleady included
  1162. hier_part.or(query);
  1163. }
  1164. /**
  1165. * BitSet for relativeURI.
  1166. * <p><blockquote><pre>
  1167. * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
  1168. * </pre></blockquote><p>
  1169. */
  1170. protected static final BitSet relativeURI = new BitSet(256);
  1171. // Static initializer for relativeURI
  1172. static {
  1173. relativeURI.or(net_path);
  1174. relativeURI.or(abs_path);
  1175. relativeURI.or(rel_path);
  1176. // relativeURI.set('?'); aleady included
  1177. relativeURI.or(query);
  1178. }
  1179. /**
  1180. * BitSet for absoluteURI.
  1181. * <p><blockquote><pre>
  1182. * absoluteURI = scheme ":" ( hier_part | opaque_part )
  1183. * </pre></blockquote><p>
  1184. */
  1185. protected static final BitSet absoluteURI = new BitSet(256);
  1186. // Static initializer for absoluteURI
  1187. static {
  1188. absoluteURI.or(scheme);
  1189. absoluteURI.set(':');
  1190. absoluteURI.or(hier_part);
  1191. absoluteURI.or(opaque_part);
  1192. }
  1193. /**
  1194. * BitSet for URI-reference.
  1195. * <p><blockquote><pre>
  1196. * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
  1197. * </pre></blockquote><p>
  1198. */
  1199. protected static final BitSet URI_reference = new BitSet(256);
  1200. // Static initializer for URI_reference
  1201. static {
  1202. URI_reference.or(absoluteURI);
  1203. URI_reference.or(relativeURI);
  1204. URI_reference.set('#');
  1205. URI_reference.or(fragment);
  1206. }
  1207. // ---------------------------- Characters disallowed within the URI syntax
  1208. // Excluded US-ASCII Characters are like control, space, delims and unwise
  1209. /**
  1210. * BitSet for control.
  1211. */
  1212. public static final BitSet control = new BitSet(256);
  1213. // Static initializer for control
  1214. static {
  1215. for (int i = 0; i <= 0x1F; i++) {
  1216. control.set(i);
  1217. }
  1218. control.set(0x7F);
  1219. }
  1220. /**
  1221. * BitSet for space.
  1222. */
  1223. public static final BitSet space = new BitSet(256);
  1224. // Static initializer for space
  1225. static {
  1226. space.set(0x20);
  1227. }
  1228. /**
  1229. * BitSet for delims.
  1230. */
  1231. public static final BitSet delims = new BitSet(256);
  1232. // Static initializer for delims
  1233. static {
  1234. delims.set('<');
  1235. delims.set('>');
  1236. delims.set('#');
  1237. delims.set('%');
  1238. delims.set('"');
  1239. }
  1240. /**
  1241. * BitSet for unwise.
  1242. */
  1243. public static final BitSet unwise = new BitSet(256);
  1244. // Static initializer for unwise
  1245. static {
  1246. unwise.set('{');
  1247. unwise.set('}');
  1248. unwise.set('|');
  1249. unwise.set('\\');
  1250. unwise.set('^');
  1251. unwise.set('[');
  1252. unwise.set(']');
  1253. unwise.set('`');
  1254. }
  1255. /**
  1256. * Disallowed rel_path before escaping.
  1257. */
  1258. public static final BitSet disallowed_rel_path = new BitSet(256);
  1259. // Static initializer for disallowed_rel_path
  1260. static {
  1261. disallowed_rel_path.or(uric);
  1262. disallowed_rel_path.andNot(rel_path);
  1263. }
  1264. /**
  1265. * Disallowed opaque_part before escaping.
  1266. */
  1267. public static final BitSet disallowed_opaque_part = new BitSet(256);
  1268. // Static initializer for disallowed_opaque_part
  1269. static {
  1270. disallowed_opaque_part.or(uric);
  1271. disallowed_opaque_part.andNot(opaque_part);
  1272. }
  1273. // ----------------------- Characters allowed within and for each component
  1274. /**
  1275. * Those characters that are allowed for the authority component.
  1276. */
  1277. public static final BitSet allowed_authority = new BitSet(256);
  1278. // Static initializer for allowed_authority
  1279. static {
  1280. allowed_authority.or(authority);
  1281. allowed_authority.clear('%');
  1282. }
  1283. /**
  1284. * Those characters that are allowed for the opaque_part.
  1285. */
  1286. public static final BitSet allowed_opaque_part = new BitSet(256);
  1287. // Static initializer for allowed_opaque_part
  1288. static {
  1289. allowed_opaque_part.or(opaque_part);
  1290. allowed_opaque_part.clear('%');
  1291. }
  1292. /**
  1293. * Those characters that are allowed for the reg_name.
  1294. */
  1295. public static final BitSet allowed_reg_name = new BitSet(256);
  1296. // Static initializer for allowed_reg_name
  1297. static {
  1298. allowed_reg_name.or(reg_name);
  1299. // allowed_reg_name.andNot(percent);
  1300. allowed_reg_name.clear('%');
  1301. }
  1302. /**
  1303. * Those characters that are allowed for the userinfo component.
  1304. */
  1305. public static final BitSet allowed_userinfo = new BitSet(256);
  1306. // Static initializer for allowed_userinfo
  1307. static {
  1308. allowed_userinfo.or(userinfo);
  1309. // allowed_userinfo.andNot(percent);
  1310. allowed_userinfo.clear('%');
  1311. }
  1312. /**
  1313. * Those characters that are allowed for within the userinfo component.
  1314. */
  1315. public static final BitSet allowed_within_userinfo = new BitSet(256);
  1316. // Static initializer for allowed_within_userinfo
  1317. static {
  1318. allowed_within_userinfo.or(within_userinfo);
  1319. allowed_within_userinfo.clear('%');
  1320. }
  1321. /**
  1322. * Those characters that are allowed for the IPv6reference component.
  1323. * The characters '[', ']' in IPv6reference should be excluded.
  1324. */
  1325. public static final BitSet allowed_IPv6reference = new BitSet(256);
  1326. // Static initializer for allowed_IPv6reference
  1327. static {
  1328. allowed_IPv6reference.or(IPv6reference);
  1329. // allowed_IPv6reference.andNot(unwise);
  1330. allowed_IPv6reference.clear('[');
  1331. allowed_IPv6reference.clear(']');
  1332. }
  1333. /**
  1334. * Those characters that are allowed for the host component.
  1335. * The characters '[', ']' in IPv6reference should be excluded.
  1336. */
  1337. public static final BitSet allowed_host = new BitSet(256);
  1338. // Static initializer for allowed_host
  1339. static {
  1340. allowed_host.or(hostname);
  1341. allowed_host.or(allowed_IPv6reference);
  1342. }
  1343. /**
  1344. * Those characters that are allowed for the authority component.
  1345. */
  1346. public static final BitSet allowed_within_authority = new BitSet(256);
  1347. // Static initializer for allowed_within_authority
  1348. static {
  1349. allowed_within_authority.or(server);
  1350. allowed_within_authority.or(reg_name);
  1351. allowed_within_authority.clear(';');
  1352. allowed_within_authority.clear(':');
  1353. allowed_within_authority.clear('@');
  1354. allowed_within_authority.clear('?');
  1355. allowed_within_authority.clear('/');
  1356. }
  1357. /**
  1358. * Those characters that are allowed for the abs_path.
  1359. */
  1360. public static final BitSet allowed_abs_path = new BitSet(256);
  1361. // Static initializer for allowed_abs_path
  1362. static {
  1363. allowed_abs_path.or(abs_path);
  1364. // allowed_abs_path.set('/'); // aleady included
  1365. allowed_abs_path.andNot(percent);
  1366. }
  1367. /**
  1368. * Those characters that are allowed for the rel_path.
  1369. */
  1370. public static final BitSet allowed_rel_path = new BitSet(256);
  1371. // Static initializer for allowed_rel_path
  1372. static {
  1373. allowed_rel_path.or(rel_path);
  1374. allowed_rel_path.clear('%');
  1375. }
  1376. /**
  1377. * Those characters that are allowed within the path.
  1378. */
  1379. public static final BitSet allowed_within_path = new BitSet(256);
  1380. // Static initializer for allowed_within_path
  1381. static {
  1382. allowed_within_path.or(abs_path);
  1383. allowed_within_path.clear('/');
  1384. allowed_within_path.clear(';');
  1385. allowed_within_path.clear('=');
  1386. allowed_within_path.clear('?');
  1387. }
  1388. /**
  1389. * Those characters that are allowed for the query component.
  1390. */
  1391. public static final BitSet allowed_query = new BitSet(256);
  1392. // Static initializer for allowed_query
  1393. static {
  1394. allowed_query.or(uric);
  1395. allowed_query.clear('%');
  1396. }
  1397. /**
  1398. * Those characters that are allowed within the query component.
  1399. */
  1400. public static final BitSet allowed_within_query = new BitSet(256);
  1401. // Static initializer for allowed_within_query
  1402. static {
  1403. allowed_within_query.or(allowed_query);
  1404. allowed_within_query.andNot(reserved); // excluded 'reserved'
  1405. }
  1406. /**
  1407. * Those characters that are allowed for the fragment component.
  1408. */
  1409. public static final BitSet allowed_fragment = new BitSet(256);
  1410. // Static initializer for allowed_fragment
  1411. static {
  1412. allowed_fragment.or(uric);
  1413. allowed_fragment.clear('%');
  1414. }
  1415. // ------------------------------------------- Flags for this URI-reference
  1416. // TODO: Figure out what all these variables are for and provide javadoc
  1417. // URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
  1418. // absoluteURI = scheme ":" ( hier_part | opaque_part )
  1419. protected boolean _is_hier_part;
  1420. protected boolean _is_opaque_part;
  1421. // relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
  1422. // hier_part = ( net_path | abs_path ) [ "?" query ]
  1423. protected boolean _is_net_path;
  1424. protected boolean _is_abs_path;
  1425. protected boolean _is_rel_path;
  1426. // net_path = "//" authority [ abs_path ]
  1427. // authority = server | reg_name
  1428. protected boolean _is_reg_name;
  1429. protected boolean _is_server; // = _has_server
  1430. // server = [ [ userinfo "@" ] hostport ]
  1431. // host = hostname | IPv4address | IPv6reference
  1432. protected boolean _is_hostname;
  1433. protected boolean _is_IPv4address;
  1434. protected boolean _is_IPv6reference;
  1435. // ------------------------------------------ Character and escape encoding
  1436. /**
  1437. * Encodes URI string.
  1438. *
  1439. * This is a two mapping, one from original characters to octets, and
  1440. * subsequently a second from octets to URI characters:
  1441. * <p><blockquote><pre>
  1442. * original character sequence->octet sequence->URI character sequence
  1443. * </pre></blockquote><p>
  1444. *
  1445. * An escaped octet is encoded as a character triplet, consisting of the
  1446. * percent character "%" followed by the two hexadecimal digits
  1447. * representing the octet code. For example, "%20" is the escaped
  1448. * encoding for the US-ASCII space character.
  1449. * <p>
  1450. * Conversion from the local filesystem character set to UTF-8 will
  1451. * normally involve a two step process. First convert the local character
  1452. * set to the UCS; then convert the UCS to UTF-8.
  1453. * The first step in the process can be performed by maintaining a mapping
  1454. * table that includes the local character set code and the corresponding
  1455. * UCS code.
  1456. * The next step is to convert the UCS character code to the UTF-8 encoding.
  1457. * <p>
  1458. * Mapping between vendor codepages can be done in a very similar manner
  1459. * as described above.
  1460. * <p>
  1461. * The only time escape encodings can allowedly be made is when a URI is
  1462. * being created from its component parts. The escape and validate methods
  1463. * are internally performed within this method.
  1464. *
  1465. * @param original the original character sequence
  1466. * @param allowed those characters that are allowed within a component
  1467. * @param charset the protocol charset
  1468. * @return URI character sequence
  1469. * @throws URIException null component or unsupported character encoding
  1470. */
  1471. protected static char[] encode(String original, BitSet allowed,
  1472. String charset) throws URIException {
  1473. if (original == null) {
  1474. throw new IllegalArgumentException("Original string may not be null");
  1475. }
  1476. if (allowed == null) {
  1477. throw new IllegalArgumentException("Allowed bitset may not be null");
  1478. }
  1479. byte[] rawdata = URLCodec.encodeUrl(allowed, EncodingUtil.getBytes(original, charset));
  1480. return EncodingUtil.getAsciiString(rawdata).toCharArray();
  1481. }
  1482. /**
  1483. * Decodes URI encoded string.
  1484. *
  1485. * This is a two mapping, one from URI characters to octets, and
  1486. * subsequently a second from octets to original characters:
  1487. * <p><blockquote><pre>
  1488. * URI character sequence->octet sequence->original character sequence
  1489. * </pre></blockquote><p>
  1490. *
  1491. * A URI must be separated into its components before the escaped
  1492. * characters within those components can be allowedly decoded.
  1493. * <p>
  1494. * Notice that there is a chance that URI characters that are non UTF-8
  1495. * may be parsed as valid UTF-8. A recent non-scientific analysis found
  1496. * that EUC encoded Japanese words had a 2.7% false reading; SJIS had a
  1497. * 0.0005% false reading; other encoding such as ASCII or KOI-8 have a 0%
  1498. * false reading.
  1499. * <p>
  1500. * The percent "%" character always has the reserved purpose of being
  1501. * the escape indicator, it must be escaped as "%25" in order to be used
  1502. * as data within a URI.
  1503. * <p>
  1504. * The unescape method is internally performed within this method.
  1505. *
  1506. * @param component the URI character sequence
  1507. * @param charset the protocol charset
  1508. * @return original character sequence
  1509. * @throws URIException incomplete trailing escape pattern or unsupported
  1510. * character encoding
  1511. */
  1512. protected static String decode(char[] component, String charset)
  1513. throws URIException {
  1514. if (component == null) {
  1515. throw new IllegalArgumentException("Component array of chars may not be null");
  1516. }
  1517. return decode(new String(component), charset);
  1518. }
  1519. /**
  1520. * Decodes URI encoded string.
  1521. *
  1522. * This is a two mapping, one from URI characters to octets, and
  1523. * subsequently a second from octets to original characters:
  1524. * <p><blockquote><pre>
  1525. * URI character sequence->octet sequence->original character sequence
  1526. * </pre></blockquote><p>
  1527. *
  1528. * A URI must be separated into its components before the escaped
  1529. * characters within those components can be allowedly decoded.
  1530. * <p>
  1531. * Notice that there is a chance that URI characters that are non UTF-8
  1532. * may be parsed as valid UTF-8. A recent non-scientific analysis found
  1533. * that EUC encoded Japanese words had a 2.7% false reading; SJIS had a
  1534. * 0.0005% false reading; other encoding such as ASCII or KOI-8 have a 0%
  1535. * false reading.
  1536. * <p>
  1537. * The percent "%" character always has the reserved purpose of being
  1538. * the escape indicator, it must be escaped as "%25" in order to be used
  1539. * as data within a URI.
  1540. * <p>
  1541. * The unescape method is internally performed within this method.
  1542. *
  1543. * @param component the URI character sequence
  1544. * @param charset the protocol charset
  1545. * @return original character sequence
  1546. * @throws URIException incomplete trailing escape pattern or unsupported
  1547. * character encoding
  1548. *
  1549. * @since 3.0
  1550. */
  1551. protected static String decode(String component, String charset)
  1552. throws URIException {
  1553. if (component == null) {
  1554. throw new IllegalArgumentException("Component array of chars may not be null");
  1555. }
  1556. byte[] rawdata = null;
  1557. try {
  1558. rawdata = URLCodec.decodeUrl(EncodingUtil.getAsciiBytes(component));
  1559. } catch (DecoderException e) {
  1560. throw new URIException(e.getMessage());
  1561. }
  1562. return EncodingUtil.getString(rawdata, charset);
  1563. }
  1564. /**
  1565. * Pre-validate the unescaped URI string within a specific component.
  1566. *
  1567. * @param component the component string within the component
  1568. * @param disallowed those characters disallowed within the component
  1569. * @return if true, it doesn't have the disallowed characters
  1570. * if false, the component is undefined or an incorrect one
  1571. */
  1572. protected boolean prevalidate(String component, BitSet disallowed) {
  1573. // prevalidate the given component by disallowed characters
  1574. if (component == null) {
  1575. return false; // undefined
  1576. }
  1577. char[] target = component.toCharArray();
  1578. for (int i = 0; i < target.length; i++) {
  1579. if (disallowed.get(target[i])) {
  1580. return false;
  1581. }
  1582. }
  1583. return true;
  1584. }
  1585. /**
  1586. * Validate the URI characters within a specific component.
  1587. * The component must be performed after escape encoding. Or it doesn't
  1588. * include escaped characters.
  1589. *
  1590. * @param component the characters sequence within the component
  1591. * @param generous those characters that are allowed within a component
  1592. * @return if true, it's the correct URI character sequence
  1593. */
  1594. protected boolean validate(char[] component, BitSet generous) {
  1595. // validate each component by generous characters
  1596. return validate(component, 0, -1, generous);
  1597. }
  1598. /**
  1599. * Validate the URI characters within a specific component.
  1600. * The component must be performed after escape encoding. Or it doesn't
  1601. * include escaped characters.
  1602. * <p>
  1603. * It's not that much strict, generous. The strict validation might be
  1604. * performed before being called this method.
  1605. *
  1606. * @param component the characters sequence within the component
  1607. * @param soffset the starting offset of the given component
  1608. * @param eoffset the ending offset of the given component
  1609. * if -1, it means the length of the component
  1610. * @param generous those characters that are allowed within a component
  1611. * @return if true, it's the correct URI character sequence
  1612. */
  1613. protected boolean validate(char[] component, int soffset, int eoffset,
  1614. BitSet generous) {
  1615. // validate each component by generous characters
  1616. if (eoffset == -1) {
  1617. eoffset = component.length - 1;
  1618. }
  1619. for (int i = soffset; i <= eoffset; i++) {
  1620. if (!generous.get(component[i])) {
  1621. return false;
  1622. }
  1623. }
  1624. return true;
  1625. }
  1626. /**
  1627. * In order to avoid any possilbity of conflict with non-ASCII characters,
  1628. * Parse a URI reference as a <code>String</code> with the character
  1629. * encoding of the local system or the document.
  1630. * <p>
  1631. * The following line is the regular expression for breaking-down a URI
  1632. * reference into its components.
  1633. * <p><blockquote><pre>
  1634. * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
  1635. * 12 3 4 5 6 7 8 9
  1636. * </pre></blockquote><p>
  1637. * For example, matching the above expression to
  1638. * http://jakarta.apache.org/ietf/uri/#Related
  1639. * results in the following subexpression matches:
  1640. * <p><blockquote><pre>
  1641. * $1 = http:
  1642. * scheme = $2 = http
  1643. * $3 = //jakarta.apache.org
  1644. * authority = $4 = jakarta.apache.org
  1645. * path = $5 = /ietf/uri/
  1646. * $6 = <undefined>
  1647. * query = $7 = <undefined>
  1648. * $8 = #Related
  1649. * fragment = $9 = Related
  1650. * </pre></blockquote><p>
  1651. *
  1652. * @param original the original character sequence
  1653. * @param escaped <code>true</code> if <code>original</code> is escaped
  1654. * @throws URIException If an error occurs.
  1655. */
  1656. protected void parseUriReference(String original, boolean escaped)
  1657. throws URIException {
  1658. // validate and contruct the URI character sequence
  1659. if (original == null) {
  1660. throw new URIException("URI-Reference required");
  1661. }
  1662. /* @
  1663. * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
  1664. */
  1665. String tmp = original.trim();
  1666. /*
  1667. * The length of the string sequence of characters.
  1668. * It may not be equal to the length of the byte array.
  1669. */
  1670. int length = tmp.length();
  1671. /*
  1672. * Remove the delimiters like angle brackets around an URI.
  1673. */
  1674. if (length > 0) {
  1675. char[] firstDelimiter = { tmp.charAt(0) };
  1676. if (validate(firstDelimiter, delims)) {
  1677. if (length >= 2) {
  1678. char[] lastDelimiter = { tmp.charAt(length - 1) };
  1679. if (validate(lastDelimiter, delims)) {
  1680. tmp = tmp.substring(1, length - 1);
  1681. length = length - 2;
  1682. }
  1683. }
  1684. }
  1685. }
  1686. /*
  1687. * The starting index
  1688. */
  1689. int from = 0;
  1690. /*
  1691. * The test flag whether the URI is started from the path component.
  1692. */
  1693. boolean isStartedFromPath = false;
  1694. int atColon = tmp.indexOf(':');
  1695. int atSlash = tmp.indexOf('/');
  1696. if (atColon < 0 || (atSlash >= 0 && atSlash < atColon)) {
  1697. isStartedFromPath = true;
  1698. }
  1699. /*
  1700. * <p><blockquote><pre>
  1701. * @@@@@@@@
  1702. * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
  1703. * </pre></blockquote><p>
  1704. */
  1705. int at = indexFirstOf(tmp, isStartedFromPath ? "/?#" : ":/?#", from);
  1706. if (at == -1) {
  1707. at = 0;
  1708. }
  1709. /*
  1710. * Parse the scheme.
  1711. * <p><blockquote><pre>
  1712. * scheme = $2 = http
  1713. * @
  1714. * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
  1715. * </pre></blockquote><p>
  1716. */
  1717. if (at < length && tmp.charAt(at) == ':') {
  1718. char[] target = tmp.substring(0, at).toLowerCase().toCharArray();
  1719. if (validate(target, scheme)) {
  1720. _scheme = target;
  1721. } else {
  1722. throw new URIException("incorrect scheme");
  1723. }
  1724. from = ++at;
  1725. }
  1726. /*
  1727. * Parse the authority component.
  1728. * <p><blockquote><pre>
  1729. * authority = $4 = jakarta.apache.org
  1730. * @@
  1731. * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
  1732. * </pre></blockquote><p>
  1733. */
  1734. // Reset flags
  1735. _is_net_path = _is_abs_path = _is_rel_path = _is_hier_part = false;
  1736. if (0 <= at && at < length && tmp.charAt(at) == '/') {
  1737. // Set flag
  1738. _is_hier_part = true;
  1739. if (at + 2 < length && tmp.charAt(at + 1) == '/') {
  1740. // the temporary index to start the search from
  1741. int next = indexFirstOf(tmp, "/?#", at + 2);
  1742. if (next == -1) {
  1743. next = (tmp.substring(at + 2).length() == 0) ? at + 2
  1744. : tmp.length();
  1745. }
  1746. parseAuthority(tmp.substring(at + 2, next), escaped);
  1747. from = at = next;
  1748. // Set flag
  1749. _is_net_path = true;
  1750. }
  1751. if (from == at) {
  1752. // Set flag
  1753. _is_abs_path = true;
  1754. }
  1755. }
  1756. /*
  1757. * Parse the path component.
  1758. * <p><blockquote><pre>
  1759. * path = $5 = /ietf/uri/
  1760. * @@@@@@
  1761. * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
  1762. * </pre></blockquote><p>
  1763. */
  1764. if (from < length) {
  1765. // rel_path = rel_segment [ abs_path ]
  1766. int next = indexFirstOf(tmp, "?#", from);
  1767. if (next == -1) {
  1768. next = tmp.length();
  1769. }
  1770. if (!_is_abs_path) {
  1771. if (!escaped
  1772. && prevalidate(tmp.substring(from, next), disallowed_rel_path)
  1773. || escaped
  1774. && validate(tmp.substring(from, next).toCharArray(), rel_path)) {
  1775. // Set flag
  1776. _is_rel_path = true;
  1777. } else if (!escaped
  1778. && prevalidate(tmp.substring(from, next), disallowed_opaque_part)
  1779. || escaped
  1780. && validate(tmp.substring(from, next).toCharArray(), opaque_part)) {
  1781. // Set flag
  1782. _is_opaque_part = true;
  1783. } else {
  1784. // the path component may be empty
  1785. _path = null;
  1786. }
  1787. }
  1788. if (escaped) {
  1789. setRawPath(tmp.substring(from, next).toCharArray());
  1790. } else {
  1791. setPath(tmp.substring(from, next));
  1792. }
  1793. at = next;
  1794. }
  1795. // set the charset to do escape encoding
  1796. String charset = getProtocolCharset();
  1797. /*
  1798. * Parse the query component.
  1799. * <p><blockquote><pre>
  1800. * query = $7 = <undefined>
  1801. * @@@@@@@@@
  1802. * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
  1803. * </pre></blockquote><p>
  1804. */
  1805. if (0 <= at && at + 1 < length && tmp.charAt(at) == '?') {
  1806. int next = tmp.indexOf('#', at + 1);
  1807. if (next == -1) {
  1808. next = tmp.length();
  1809. }
  1810. _query = (escaped) ? tmp.substring(at + 1, next).toCharArray()
  1811. : encode(tmp.substring(at + 1, next), allowed_query, charset);
  1812. at = next;
  1813. }
  1814. /*
  1815. * Parse the fragment component.
  1816. * <p><blockquote><pre>
  1817. * fragment = $9 = Related
  1818. * @@@@@@@@
  1819. * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
  1820. * </pre></blockquote><p>
  1821. */
  1822. if (0 <= at && at + 1 <= length && tmp.charAt(at) == '#') {
  1823. if (at + 1 == length) { // empty fragment
  1824. _fragment = "".toCharArray();
  1825. } else {
  1826. _fragment = (escaped) ? tmp.substring(at + 1).toCharArray()
  1827. : encode(tmp.substring(at + 1), allowed_fragment, charset);
  1828. }
  1829. }
  1830. // set this URI.
  1831. setURI();
  1832. }
  1833. /**
  1834. * Get the earlier index that to be searched for the first occurrance in
  1835. * one of any of the given string.
  1836. *
  1837. * @param s the string to be indexed
  1838. * @param delims the delimiters used to index
  1839. * @return the earlier index if there are delimiters
  1840. */
  1841. protected int indexFirstOf(String s, String delims) {
  1842. return indexFirstOf(s, delims, -1);
  1843. }
  1844. /**
  1845. * Get the earlier index that to be searched for the first occurrance in
  1846. * one of any of the given string.
  1847. *
  1848. * @param s the string to be indexed
  1849. * @param delims the delimiters used to index
  1850. * @param offset the from index
  1851. * @return the earlier index if there are delimiters
  1852. */
  1853. protected int indexFirstOf(String s, String delims, int offset) {
  1854. if (s == null || s.length() == 0) {
  1855. return -1;
  1856. }
  1857. if (delims == null || delims.length() == 0) {
  1858. return -1;
  1859. }
  1860. // check boundaries
  1861. if (offset < 0) {
  1862. offset = 0;
  1863. } else if (offset > s.length()) {
  1864. return -1;
  1865. }
  1866. // s is never null
  1867. int min = s.length();
  1868. char[] delim = delims.toCharArray();
  1869. for (int i = 0; i < delim.length; i++) {
  1870. int at = s.indexOf(delim[i], offset);
  1871. if (at >= 0 && at < min) {
  1872. min = at;
  1873. }
  1874. }
  1875. return (min == s.length()) ? -1 : min;
  1876. }
  1877. /**
  1878. * Get the earlier index that to be searched for the first occurrance in
  1879. * one of any of the given array.
  1880. *
  1881. * @param s the character array to be indexed
  1882. * @param delim the delimiter used to index
  1883. * @return the ealier index if there are a delimiter
  1884. */
  1885. protected int indexFirstOf(char[] s, char delim) {
  1886. return indexFirstOf(s, delim, 0);
  1887. }
  1888. /**
  1889. * Get the earlier index that to be searched for the first occurrance in
  1890. * one of any of the given array.
  1891. *
  1892. * @param s the character array to be indexed
  1893. * @param delim the delimiter used to index
  1894. * @param offset The offset.
  1895. * @return the ealier index if there is a delimiter
  1896. */
  1897. protected int indexFirstOf(char[] s, char delim, int offset) {
  1898. if (s == null || s.length == 0) {
  1899. return -1;
  1900. }
  1901. // check boundaries
  1902. if (offset < 0) {
  1903. offset = 0;
  1904. } else if (offset > s.length) {
  1905. return -1;
  1906. }
  1907. for (int i = offset; i < s.length; i++) {
  1908. if (s[i] == delim) {
  1909. return i;
  1910. }
  1911. }
  1912. return -1;
  1913. }
  1914. /**
  1915. * Parse the authority component.
  1916. *
  1917. * @param original the original character sequence of authority component
  1918. * @param escaped <code>true</code> if <code>original</code> is escaped
  1919. * @throws URIException If an error occurs.
  1920. */
  1921. protected void parseAuthority(String original, boolean escaped)
  1922. throws URIException {
  1923. // Reset flags
  1924. _is_reg_name = _is_server =
  1925. _is_hostname = _is_IPv4address = _is_IPv6reference = false;
  1926. // set the charset to do escape encoding
  1927. String charset = getProtocolCharset();
  1928. boolean hasPort = true;
  1929. int from = 0;
  1930. int next = original.indexOf('@');
  1931. if (next != -1) { // neither -1 and 0
  1932. // each protocol extented from URI supports the specific userinfo
  1933. _userinfo = (escaped) ? original.substring(0, next).toCharArray()
  1934. : encode(original.substring(0, next), allowed_userinfo,
  1935. charset);
  1936. from = next + 1;
  1937. }
  1938. next = original.indexOf('[', from);
  1939. if (next >= from) {
  1940. next = original.indexOf(']', from);
  1941. if (next == -1) {
  1942. throw new URIException(URIException.PARSING, "IPv6reference");
  1943. } else {
  1944. next++;
  1945. }
  1946. // In IPv6reference, '[', ']' should be excluded
  1947. _host = (escaped) ? original.substring(from, next).toCharArray()
  1948. : encode(original.substring(from, next), allowed_IPv6reference,
  1949. charset);
  1950. // Set flag
  1951. _is_IPv6reference = true;
  1952. } else { // only for !_is_IPv6reference
  1953. next = original.indexOf(':', from);
  1954. if (next == -1) {
  1955. next = original.length();
  1956. hasPort = false;
  1957. }
  1958. // REMINDME: it doesn't need the pre-validation
  1959. _host = original.substring(from, next).toCharArray();
  1960. if (validate(_host, IPv4address)) {
  1961. // Set flag
  1962. _is_IPv4address = true;
  1963. } else if (validate(_host, hostname)) {
  1964. // Set flag
  1965. _is_hostname = true;
  1966. } else {
  1967. // Set flag
  1968. _is_reg_name = true;
  1969. }
  1970. }
  1971. if (_is_reg_name) {
  1972. // Reset flags for a server-based naming authority
  1973. _is_server = _is_hostname = _is_IPv4address =
  1974. _is_IPv6reference = false;
  1975. // set a registry-based naming authority
  1976. _authority = (escaped) ? original.toString().toCharArray()
  1977. : encode(original.toString(), allowed_reg_name, charset);
  1978. } else {
  1979. if (original.length() - 1 > next && hasPort
  1980. && original.charAt(next) == ':') { // not empty
  1981. from = next + 1;
  1982. try {
  1983. _port = Integer.parseInt(original.substring(from));
  1984. } catch (NumberFormatException error) {
  1985. throw new URIException(URIException.PARSING,
  1986. "invalid port number");
  1987. }
  1988. }
  1989. // set a server-based naming authority
  1990. StringBuffer buf = new StringBuffer();
  1991. if (_userinfo != null) { // has_userinfo
  1992. buf.append(_userinfo);
  1993. buf.append('@');
  1994. }
  1995. if (_host != null) {
  1996. buf.append(_host);
  1997. if (_port != -1) {
  1998. buf.append(':');
  1999. buf.append(_port);
  2000. }
  2001. }
  2002. _authority = buf.toString().toCharArray();
  2003. // Set flag
  2004. _is_server = true;
  2005. }
  2006. }
  2007. /**
  2008. * Once it's parsed successfully, set this URI.
  2009. *
  2010. * @see #getRawURI
  2011. */
  2012. protected void setURI() {
  2013. // set _uri
  2014. StringBuffer buf = new StringBuffer();
  2015. // ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
  2016. if (_scheme != null) {
  2017. buf.append(_scheme);
  2018. buf.append(':');
  2019. }
  2020. if (_is_net_path) {
  2021. buf.append("//");
  2022. if (_authority != null) { // has_authority
  2023. if (_userinfo != null) { // by default, remove userinfo part
  2024. if (_host != null) {
  2025. buf.append(_host);
  2026. if (_port != -1) {
  2027. buf.append(':');
  2028. buf.append(_port);
  2029. }
  2030. }
  2031. } else {
  2032. buf.append(_authority);
  2033. }
  2034. }
  2035. }
  2036. if (_opaque != null && _is_opaque_part) {
  2037. buf.append(_opaque);
  2038. } else if (_path != null) {
  2039. // _is_hier_part or _is_relativeURI
  2040. if (_path.length != 0) {
  2041. buf.append(_path);
  2042. }
  2043. }
  2044. if (_query != null) { // has_query
  2045. buf.append('?');
  2046. buf.append(_query);
  2047. }
  2048. // ignore the fragment identifier
  2049. _uri = buf.toString().toCharArray();
  2050. hash = 0;
  2051. }
  2052. // ----------------------------------------------------------- Test methods
  2053. /**
  2054. * Tell whether or not this URI is absolute.
  2055. *
  2056. * @return true iif this URI is absoluteURI
  2057. */
  2058. public boolean isAbsoluteURI() {
  2059. return (_scheme != null);
  2060. }
  2061. /**
  2062. * Tell whether or not this URI is relative.
  2063. *
  2064. * @return true iif this URI is relativeURI
  2065. */
  2066. public boolean isRelativeURI() {
  2067. return (_scheme == null);
  2068. }
  2069. /**
  2070. * Tell whether or not the absoluteURI of this URI is hier_part.
  2071. *
  2072. * @return true iif the absoluteURI is hier_part
  2073. */
  2074. public boolean isHierPart() {
  2075. return _is_hier_part;
  2076. }
  2077. /**
  2078. * Tell whether or not the absoluteURI of this URI is opaque_part.
  2079. *
  2080. * @return true iif the absoluteURI is opaque_part
  2081. */
  2082. public boolean isOpaquePart() {
  2083. return _is_opaque_part;
  2084. }
  2085. /**
  2086. * Tell whether or not the relativeURI or heir_part of this URI is net_path.
  2087. * It's the same function as the has_authority() method.
  2088. *
  2089. * @return true iif the relativeURI or heir_part is net_path
  2090. * @see #hasAuthority
  2091. */
  2092. public boolean isNetPath() {
  2093. return _is_net_path || (_authority != null);
  2094. }
  2095. /**
  2096. * Tell whether or not the relativeURI or hier_part of this URI is abs_path.
  2097. *
  2098. * @return true iif the relativeURI or hier_part is abs_path
  2099. */
  2100. public boolean isAbsPath() {
  2101. return _is_abs_path;
  2102. }
  2103. /**
  2104. * Tell whether or not the relativeURI of this URI is rel_path.
  2105. *
  2106. * @return true iif the relativeURI is rel_path
  2107. */
  2108. public boolean isRelPath() {
  2109. return _is_rel_path;
  2110. }
  2111. /**
  2112. * Tell whether or not this URI has authority.
  2113. * It's the same function as the is_net_path() method.
  2114. *
  2115. * @return true iif this URI has authority
  2116. * @see #isNetPath
  2117. */
  2118. public boolean hasAuthority() {
  2119. return (_authority != null) || _is_net_path;
  2120. }
  2121. /**
  2122. * Tell whether or not the authority component of this URI is reg_name.
  2123. *
  2124. * @return true iif the authority component is reg_name
  2125. */
  2126. public boolean isRegName() {
  2127. return _is_reg_name;
  2128. }
  2129. /**
  2130. * Tell whether or not the authority component of this URI is server.
  2131. *
  2132. * @return true iif the authority component is server
  2133. */
  2134. public boolean isServer() {
  2135. return _is_server;
  2136. }
  2137. /**
  2138. * Tell whether or not this URI has userinfo.
  2139. *
  2140. * @return true iif this URI has userinfo
  2141. */
  2142. public boolean hasUserinfo() {
  2143. return (_userinfo != null);
  2144. }
  2145. /**
  2146. * Tell whether or not the host part of this URI is hostname.
  2147. *
  2148. * @return true iif the host part is hostname
  2149. */
  2150. public boolean isHostname() {
  2151. return _is_hostname;
  2152. }
  2153. /**
  2154. * Tell whether or not the host part of this URI is IPv4address.
  2155. *
  2156. * @return true iif the host part is IPv4address
  2157. */
  2158. public boolean isIPv4address() {
  2159. return _is_IPv4address;
  2160. }
  2161. /**
  2162. * Tell whether or not the host part of this URI is IPv6reference.
  2163. *
  2164. * @return true iif the host part is IPv6reference
  2165. */
  2166. public boolean isIPv6reference() {
  2167. return _is_IPv6reference;
  2168. }
  2169. /**
  2170. * Tell whether or not this URI has query.
  2171. *
  2172. * @return true iif this URI has query
  2173. */
  2174. public boolean hasQuery() {
  2175. return (_query != null);
  2176. }
  2177. /**
  2178. * Tell whether or not this URI has fragment.
  2179. *
  2180. * @return true iif this URI has fragment
  2181. */
  2182. public boolean hasFragment() {
  2183. return (_fragment != null);
  2184. }
  2185. // ---------------------------------------------------------------- Charset
  2186. /**
  2187. * Set the default charset of the protocol.
  2188. * <p>
  2189. * The character set used to store files SHALL remain a local decision and
  2190. * MAY depend on the capability of local operating systems. Prior to the
  2191. * exchange of URIs they SHOULD be converted into a ISO/IEC 10646 format
  2192. * and UTF-8 encoded. This approach, while allowing international exchange
  2193. * of URIs, will still allow backward compatibility with older systems
  2194. * because the code set positions for ASCII characters are identical to the
  2195. * one byte sequence in UTF-8.
  2196. * <p>
  2197. * An individual URI scheme may require a single charset, define a default
  2198. * charset, or provide a way to indicate the charset used.
  2199. *
  2200. * <p>
  2201. * Always all the time, the setter method is always succeeded and throws
  2202. * <code>DefaultCharsetChanged</code> exception.
  2203. *
  2204. * So API programmer must follow the following way:
  2205. * <code><pre>
  2206. * import org.apache.util.URI$DefaultCharsetChanged;
  2207. * .
  2208. * .
  2209. * .
  2210. * try {
  2211. * URI.setDefaultProtocolCharset("UTF-8");
  2212. * } catch (DefaultCharsetChanged cc) {
  2213. * // CASE 1: the exception could be ignored, when it is set by user
  2214. * if (cc.getReasonCode() == DefaultCharsetChanged.PROTOCOL_CHARSET) {
  2215. * // CASE 2: let user know the default protocol charset changed
  2216. * } else {
  2217. * // CASE 2: let user know the default document charset changed
  2218. * }
  2219. * }
  2220. * </pre></code>
  2221. *
  2222. * The API programmer is responsible to set the correct charset.
  2223. * And each application should remember its own charset to support.
  2224. *
  2225. * @param charset the default charset for each protocol
  2226. * @throws DefaultCharsetChanged default charset changed
  2227. */
  2228. public static void setDefaultProtocolCharset(String charset)
  2229. throws DefaultCharsetChanged {
  2230. defaultProtocolCharset = charset;
  2231. throw new DefaultCharsetChanged(DefaultCharsetChanged.PROTOCOL_CHARSET,
  2232. "the default protocol charset changed");
  2233. }
  2234. /**
  2235. * Get the default charset of the protocol.
  2236. * <p>
  2237. * An individual URI scheme may require a single charset, define a default
  2238. * charset, or provide a way to indicate the charset used.
  2239. * <p>
  2240. * To work globally either requires support of a number of character sets
  2241. * and to be able to convert between them, or the use of a single preferred
  2242. * character set.
  2243. * For support of global compatibility it is STRONGLY RECOMMENDED that
  2244. * clients and servers use UTF-8 encoding when exchanging URIs.
  2245. *
  2246. * @return the default charset string
  2247. */
  2248. public static String getDefaultProtocolCharset() {
  2249. return defaultProtocolCharset;
  2250. }
  2251. /**
  2252. * Get the protocol charset used by this current URI instance.
  2253. * It was set by the constructor for this instance. If it was not set by
  2254. * contructor, it will return the default protocol charset.
  2255. *
  2256. * @return the protocol charset string
  2257. * @see #getDefaultProtocolCharset
  2258. */
  2259. public String getProtocolCharset() {
  2260. return (protocolCharset != null)
  2261. ? protocolCharset
  2262. : defaultProtocolCharset;
  2263. }
  2264. /**
  2265. * Set the default charset of the document.
  2266. * <p>
  2267. * Notice that it will be possible to contain mixed characters (e.g.
  2268. * ftp://host/KoreanNamespace/ChineseResource). To handle the Bi-directional
  2269. * display of these character sets, the protocol charset could be simply
  2270. * used again. Because it's not yet implemented that the insertion of BIDI
  2271. * control characters at different points during composition is extracted.
  2272. * <p>
  2273. *
  2274. * Always all the time, the setter method is always succeeded and throws
  2275. * <code>DefaultCharsetChanged</code> exception.
  2276. *
  2277. * So API programmer must follow the following way:
  2278. * <code><pre>
  2279. * import org.apache.util.URI$DefaultCharsetChanged;
  2280. * .
  2281. * .
  2282. * .
  2283. * try {
  2284. * URI.setDefaultDocumentCharset("EUC-KR");
  2285. * } catch (DefaultCharsetChanged cc) {
  2286. * // CASE 1: the exception could be ignored, when it is set by user
  2287. * if (cc.getReasonCode() == DefaultCharsetChanged.DOCUMENT_CHARSET) {
  2288. * // CASE 2: let user know the default document charset changed
  2289. * } else {
  2290. * // CASE 2: let user know the default protocol charset changed
  2291. * }
  2292. * }
  2293. * </pre></code>
  2294. *
  2295. * The API programmer is responsible to set the correct charset.
  2296. * And each application should remember its own charset to support.
  2297. *
  2298. * @param charset the default charset for the document
  2299. * @throws DefaultCharsetChanged default charset changed
  2300. */
  2301. public static void setDefaultDocumentCharset(String charset)
  2302. throws DefaultCharsetChanged {
  2303. defaultDocumentCharset = charset;
  2304. throw new DefaultCharsetChanged(DefaultCharsetChanged.DOCUMENT_CHARSET,
  2305. "the default document charset changed");
  2306. }
  2307. /**
  2308. * Get the recommended default charset of the document.
  2309. *
  2310. * @return the default charset string
  2311. */
  2312. public static String getDefaultDocumentCharset() {
  2313. return defaultDocumentCharset;
  2314. }
  2315. /**
  2316. * Get the default charset of the document by locale.
  2317. *
  2318. * @return the default charset string by locale
  2319. */
  2320. public static String getDefaultDocumentCharsetByLocale() {
  2321. return defaultDocumentCharsetByLocale;
  2322. }
  2323. /**
  2324. * Get the default charset of the document by platform.
  2325. *
  2326. * @return the default charset string by platform
  2327. */
  2328. public static String getDefaultDocumentCharsetByPlatform() {
  2329. return defaultDocumentCharsetByPlatform;
  2330. }
  2331. // ------------------------------------------------------------- The scheme
  2332. /**
  2333. * Get the scheme.
  2334. *
  2335. * @return the scheme
  2336. */
  2337. public char[] getRawScheme() {
  2338. return _scheme;
  2339. }
  2340. /**
  2341. * Get the scheme.
  2342. *
  2343. * @return the scheme
  2344. * null if undefined scheme
  2345. */
  2346. public String getScheme() {
  2347. return (_scheme == null) ? null : new String(_scheme);
  2348. }
  2349. // ---------------------------------------------------------- The authority
  2350. /**
  2351. * Set the authority. It can be one type of server, hostport, hostname,
  2352. * IPv4address, IPv6reference and reg_name.
  2353. * <p><blockquote><pre>
  2354. * authority = server | reg_name
  2355. * </pre></blockquote><p>
  2356. *
  2357. * @param escapedAuthority the raw escaped authority
  2358. * @throws URIException If {@link
  2359. * #parseAuthority(java.lang.String,boolean)} fails
  2360. * @throws NullPointerException null authority
  2361. */
  2362. public void setRawAuthority(char[] escapedAuthority)
  2363. throws URIException, NullPointerException {
  2364. parseAuthority(new String(escapedAuthority), true);
  2365. setURI();
  2366. }
  2367. /**
  2368. * Set the authority. It can be one type of server, hostport, hostname,
  2369. * IPv4address, IPv6reference and reg_name.
  2370. * Note that there is no setAuthority method by the escape encoding reason.
  2371. *
  2372. * @param escapedAuthority the escaped authority string
  2373. * @throws URIException If {@link
  2374. * #parseAuthority(java.lang.String,boolean)} fails
  2375. */
  2376. public void setEscapedAuthority(String escapedAuthority)
  2377. throws URIException {
  2378. parseAuthority(escapedAuthority, true);
  2379. setURI();
  2380. }
  2381. /**
  2382. * Get the raw-escaped authority.
  2383. *
  2384. * @return the raw-escaped authority
  2385. */
  2386. public char[] getRawAuthority() {
  2387. return _authority;
  2388. }
  2389. /**
  2390. * Get the escaped authority.
  2391. *
  2392. * @return the escaped authority
  2393. */
  2394. public String getEscapedAuthority() {
  2395. return (_authority == null) ? null : new String(_authority);
  2396. }
  2397. /**
  2398. * Get the authority.
  2399. *
  2400. * @return the authority
  2401. * @throws URIException If {@link #decode} fails
  2402. */
  2403. public String getAuthority() throws URIException {
  2404. return (_authority == null) ? null : decode(_authority,
  2405. getProtocolCharset());
  2406. }
  2407. // ----------------------------------------------------------- The userinfo
  2408. /**
  2409. * Get the raw-escaped userinfo.
  2410. *
  2411. * @return the raw-escaped userinfo
  2412. * @see #getAuthority
  2413. */
  2414. public char[] getRawUserinfo() {
  2415. return _userinfo;
  2416. }
  2417. /**
  2418. * Get the escaped userinfo.
  2419. *
  2420. * @return the escaped userinfo
  2421. * @see #getAuthority
  2422. */
  2423. public String getEscapedUserinfo() {
  2424. return (_userinfo == null) ? null : new String(_userinfo);
  2425. }
  2426. /**
  2427. * Get the userinfo.
  2428. *
  2429. * @return the userinfo
  2430. * @throws URIException If {@link #decode} fails
  2431. * @see #getAuthority
  2432. */
  2433. public String getUserinfo() throws URIException {
  2434. return (_userinfo == null) ? null : decode(_userinfo,
  2435. getProtocolCharset());
  2436. }
  2437. // --------------------------------------------------------------- The host
  2438. /**
  2439. * Get the host.
  2440. * <p><blockquote><pre>
  2441. * host = hostname | IPv4address | IPv6reference
  2442. * </pre></blockquote><p>
  2443. *
  2444. * @return the host
  2445. * @see #getAuthority
  2446. */
  2447. public char[] getRawHost() {
  2448. return _host;
  2449. }
  2450. /**
  2451. * Get the host.
  2452. * <p><blockquote><pre>
  2453. * host = hostname | IPv4address | IPv6reference
  2454. * </pre></blockquote><p>
  2455. *
  2456. * @return the host
  2457. * @throws URIException If {@link #decode} fails
  2458. * @see #getAuthority
  2459. */
  2460. public String getHost() throws URIException {
  2461. return decode(_host, getProtocolCharset());
  2462. }
  2463. // --------------------------------------------------------------- The port
  2464. /**
  2465. * Get the port. In order to get the specfic default port, the specific
  2466. * protocol-supported class extended from the URI class should be used.
  2467. * It has the server-based naming authority.
  2468. *
  2469. * @return the port
  2470. * if -1, it has the default port for the scheme or the server-based
  2471. * naming authority is not supported in the specific URI.
  2472. */
  2473. public int getPort() {
  2474. return _port;
  2475. }
  2476. // --------------------------------------------------------------- The path
  2477. /**
  2478. * Set the raw-escaped path.
  2479. *
  2480. * @param escapedPath the path character sequence
  2481. * @throws URIException encoding error or not proper for initial instance
  2482. * @see #encode
  2483. */
  2484. public void setRawPath(char[] escapedPath) throws URIException {
  2485. if (escapedPath == null || escapedPath.length == 0) {
  2486. _path = _opaque = escapedPath;
  2487. setURI();
  2488. return;
  2489. }
  2490. // remove the fragment identifier
  2491. escapedPath = removeFragmentIdentifier(escapedPath);
  2492. if (_is_net_path || _is_abs_path) {
  2493. if (escapedPath[0] != '/') {
  2494. throw new URIException(URIException.PARSING,
  2495. "not absolute path");
  2496. }
  2497. if (!validate(escapedPath, abs_path)) {
  2498. throw new URIException(URIException.ESCAPING,
  2499. "escaped absolute path not valid");
  2500. }
  2501. _path = escapedPath;
  2502. } else if (_is_rel_path) {
  2503. int at = indexFirstOf(escapedPath, '/');
  2504. if (at == 0) {
  2505. throw new URIException(URIException.PARSING, "incorrect path");
  2506. }
  2507. if (at > 0 && !validate(escapedPath, 0, at - 1, rel_segment)
  2508. && !validate(escapedPath, at, -1, abs_path)
  2509. || at < 0 && !validate(escapedPath, 0, -1, rel_segment)) {
  2510. throw new URIException(URIException.ESCAPING,
  2511. "escaped relative path not valid");
  2512. }
  2513. _path = escapedPath;
  2514. } else if (_is_opaque_part) {
  2515. if (!uric_no_slash.get(escapedPath[0])
  2516. && !validate(escapedPath, 1, -1, uric)) {
  2517. throw new URIException(URIException.ESCAPING,
  2518. "escaped opaque part not valid");
  2519. }
  2520. _opaque = escapedPath;
  2521. } else {
  2522. throw new URIException(URIException.PARSING, "incorrect path");
  2523. }
  2524. setURI();
  2525. }
  2526. /**
  2527. * Set the escaped path.
  2528. *
  2529. * @param escapedPath the escaped path string
  2530. * @throws URIException encoding error or not proper for initial instance
  2531. * @see #encode
  2532. */
  2533. public void setEscapedPath(String escapedPath) throws URIException {
  2534. if (escapedPath == null) {
  2535. _path = _opaque = null;
  2536. setURI();
  2537. return;
  2538. }
  2539. setRawPath(escapedPath.toCharArray());
  2540. }
  2541. /**
  2542. * Set the path.
  2543. *
  2544. * @param path the path string
  2545. * @throws URIException set incorrectly or fragment only
  2546. * @see #encode
  2547. */
  2548. public void setPath(String path) throws URIException {
  2549. if (path == null || path.length() == 0) {
  2550. _path = _opaque = (path == null) ? null : path.toCharArray();
  2551. setURI();
  2552. return;
  2553. }
  2554. // set the charset to do escape encoding
  2555. String charset = getProtocolCharset();
  2556. if (_is_net_path || _is_abs_path) {
  2557. _path = encode(path, allowed_abs_path, charset);
  2558. } else if (_is_rel_path) {
  2559. StringBuffer buff = new StringBuffer(path.length());
  2560. int at = path.indexOf('/');
  2561. if (at == 0) { // never 0
  2562. throw new URIException(URIException.PARSING,
  2563. "incorrect relative path");
  2564. }
  2565. if (at > 0) {
  2566. buff.append(encode(path.substring(0, at), allowed_rel_path,
  2567. charset));
  2568. buff.append(encode(path.substring(at), allowed_abs_path,
  2569. charset));
  2570. } else {
  2571. buff.append(encode(path, allowed_rel_path, charset));
  2572. }
  2573. _path = buff.toString().toCharArray();
  2574. } else if (_is_opaque_part) {
  2575. StringBuffer buf = new StringBuffer();
  2576. buf.insert(0, encode(path.substring(0, 1), uric_no_slash, charset));
  2577. buf.insert(1, encode(path.substring(1), uric, charset));
  2578. _opaque = buf.toString().toCharArray();
  2579. } else {
  2580. throw new URIException(URIException.PARSING, "incorrect path");
  2581. }
  2582. setURI();
  2583. }
  2584. /**
  2585. * Resolve the base and relative path.
  2586. *
  2587. * @param basePath a character array of the basePath
  2588. * @param relPath a character array of the relPath
  2589. * @return the resolved path
  2590. * @throws URIException no more higher path level to be resolved
  2591. */
  2592. protected char[] resolvePath(char[] basePath, char[] relPath)
  2593. throws URIException {
  2594. // REMINDME: paths are never null
  2595. String base = (basePath == null) ? "" : new String(basePath);
  2596. int at = base.lastIndexOf('/');
  2597. if (at != -1) {
  2598. basePath = base.substring(0, at + 1).toCharArray();
  2599. }
  2600. // _path could be empty
  2601. if (relPath == null || relPath.length == 0) {
  2602. return normalize(basePath);
  2603. } else if (relPath[0] == '/') {
  2604. return normalize(relPath);
  2605. } else {
  2606. StringBuffer buff = new StringBuffer(base.length()
  2607. + relPath.length);
  2608. buff.append((at != -1) ? base.substring(0, at + 1) : "/");
  2609. buff.append(relPath);
  2610. return normalize(buff.toString().toCharArray());
  2611. }
  2612. }
  2613. /**
  2614. * Get the raw-escaped current hierarchy level in the given path.
  2615. * If the last namespace is a collection, the slash mark ('/') should be
  2616. * ended with at the last character of the path string.
  2617. *
  2618. * @param path the path
  2619. * @return the current hierarchy level
  2620. * @throws URIException no hierarchy level
  2621. */
  2622. protected char[] getRawCurrentHierPath(char[] path) throws URIException {
  2623. if (_is_opaque_part) {
  2624. throw new URIException(URIException.PARSING, "no hierarchy level");
  2625. }
  2626. if (path == null) {
  2627. throw new URIException(URIException.PARSING, "empty path");
  2628. }
  2629. String buff = new String(path);
  2630. int first = buff.indexOf('/');
  2631. int last = buff.lastIndexOf('/');
  2632. if (last == 0) {
  2633. return rootPath;
  2634. } else if (first != last && last != -1) {
  2635. return buff.substring(0, last).toCharArray();
  2636. }
  2637. // FIXME: it could be a document on the server side
  2638. return path;
  2639. }
  2640. /**
  2641. * Get the raw-escaped current hierarchy level.
  2642. *
  2643. * @return the raw-escaped current hierarchy level
  2644. * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
  2645. */
  2646. public char[] getRawCurrentHierPath() throws URIException {
  2647. return (_path == null) ? null : getRawCurrentHierPath(_path);
  2648. }
  2649. /**
  2650. * Get the escaped current hierarchy level.
  2651. *
  2652. * @return the escaped current hierarchy level
  2653. * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
  2654. */
  2655. public String getEscapedCurrentHierPath() throws URIException {
  2656. char[] path = getRawCurrentHierPath();
  2657. return (path == null) ? null : new String(path);
  2658. }
  2659. /**
  2660. * Get the current hierarchy level.
  2661. *
  2662. * @return the current hierarchy level
  2663. * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
  2664. * @see #decode
  2665. */
  2666. public String getCurrentHierPath() throws URIException {
  2667. char[] path = getRawCurrentHierPath();
  2668. return (path == null) ? null : decode(path, getProtocolCharset());
  2669. }
  2670. /**
  2671. * Get the level above the this hierarchy level.
  2672. *
  2673. * @return the raw above hierarchy level
  2674. * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
  2675. */
  2676. public char[] getRawAboveHierPath() throws URIException {
  2677. char[] path = getRawCurrentHierPath();
  2678. return (path == null) ? null : getRawCurrentHierPath(path);
  2679. }
  2680. /**
  2681. * Get the level above the this hierarchy level.
  2682. *
  2683. * @return the raw above hierarchy level
  2684. * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
  2685. */
  2686. public String getEscapedAboveHierPath() throws URIException {
  2687. char[] path = getRawAboveHierPath();
  2688. return (path == null) ? null : new String(path);
  2689. }
  2690. /**
  2691. * Get the level above the this hierarchy level.
  2692. *
  2693. * @return the above hierarchy level
  2694. * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
  2695. * @see #decode
  2696. */
  2697. public String getAboveHierPath() throws URIException {
  2698. char[] path = getRawAboveHierPath();
  2699. return (path == null) ? null : decode(path, getProtocolCharset());
  2700. }
  2701. /**
  2702. * Get the raw-escaped path.
  2703. * <p><blockquote><pre>
  2704. * path = [ abs_path | opaque_part ]
  2705. * </pre></blockquote><p>
  2706. *
  2707. * @return the raw-escaped path
  2708. */
  2709. public char[] getRawPath() {
  2710. return _is_opaque_part ? _opaque : _path;
  2711. }
  2712. /**
  2713. * Get the escaped path.
  2714. * <p><blockquote><pre>
  2715. * path = [ abs_path | opaque_part ]
  2716. * abs_path = "/" path_segments
  2717. * opaque_part = uric_no_slash *uric
  2718. * </pre></blockquote><p>
  2719. *
  2720. * @return the escaped path string
  2721. */
  2722. public String getEscapedPath() {
  2723. char[] path = getRawPath();
  2724. return (path == null) ? null : new String(path);
  2725. }
  2726. /**
  2727. * Get the path.
  2728. * <p><blockquote><pre>
  2729. * path = [ abs_path | opaque_part ]
  2730. * </pre></blockquote><p>
  2731. * @return the path string
  2732. * @throws URIException If {@link #decode} fails.
  2733. * @see #decode
  2734. */
  2735. public String getPath() throws URIException {
  2736. char[] path = getRawPath();
  2737. return (path == null) ? null : decode(path, getProtocolCharset());
  2738. }
  2739. /**
  2740. * Get the raw-escaped basename of the path.
  2741. *
  2742. * @return the raw-escaped basename
  2743. */
  2744. public char[] getRawName() {
  2745. if (_path == null) {
  2746. return null;
  2747. }
  2748. int at = 0;
  2749. for (int i = _path.length - 1; i >= 0; i--) {
  2750. if (_path[i] == '/') {
  2751. at = i + 1;
  2752. break;
  2753. }
  2754. }
  2755. int len = _path.length - at;
  2756. char[] basename = new char[len];
  2757. System.arraycopy(_path, at, basename, 0, len);
  2758. return basename;
  2759. }
  2760. /**
  2761. * Get the escaped basename of the path.
  2762. *
  2763. * @return the escaped basename string
  2764. */
  2765. public String getEscapedName() {
  2766. char[] basename = getRawName();
  2767. return (basename == null) ? null : new String(basename);
  2768. }
  2769. /**
  2770. * Get the basename of the path.
  2771. *
  2772. * @return the basename string
  2773. * @throws URIException incomplete trailing escape pattern or unsupported
  2774. * character encoding
  2775. * @see #decode
  2776. */
  2777. public String getName() throws URIException {
  2778. char[] basename = getRawName();
  2779. return (basename == null) ? null : decode(getRawName(),
  2780. getProtocolCharset());
  2781. }
  2782. // ----------------------------------------------------- The path and query
  2783. /**
  2784. * Get the raw-escaped path and query.
  2785. *
  2786. * @return the raw-escaped path and query
  2787. */
  2788. public char[] getRawPathQuery() {
  2789. if (_path == null && _query == null) {
  2790. return null;
  2791. }
  2792. StringBuffer buff = new StringBuffer();
  2793. if (_path != null) {
  2794. buff.append(_path);
  2795. }
  2796. if (_query != null) {
  2797. buff.append('?');
  2798. buff.append(_query);
  2799. }
  2800. return buff.toString().toCharArray();
  2801. }
  2802. /**
  2803. * Get the escaped query.
  2804. *
  2805. * @return the escaped path and query string
  2806. */
  2807. public String getEscapedPathQuery() {
  2808. char[] rawPathQuery = getRawPathQuery();
  2809. return (rawPathQuery == null) ? null : new String(rawPathQuery);
  2810. }
  2811. /**
  2812. * Get the path and query.
  2813. *
  2814. * @return the path and query string.
  2815. * @throws URIException incomplete trailing escape pattern or unsupported
  2816. * character encoding
  2817. * @see #decode
  2818. */
  2819. public String getPathQuery() throws URIException {
  2820. char[] rawPathQuery = getRawPathQuery();
  2821. return (rawPathQuery == null) ? null : decode(rawPathQuery,
  2822. getProtocolCharset());
  2823. }
  2824. // -------------------------------------------------------------- The query
  2825. /**
  2826. * Set the raw-escaped query.
  2827. *
  2828. * @param escapedQuery the raw-escaped query
  2829. * @throws URIException escaped query not valid
  2830. */
  2831. public void setRawQuery(char[] escapedQuery) throws URIException {
  2832. if (escapedQuery == null || escapedQuery.length == 0) {
  2833. _query = escapedQuery;
  2834. setURI();
  2835. return;
  2836. }
  2837. // remove the fragment identifier
  2838. escapedQuery = removeFragmentIdentifier(escapedQuery);
  2839. if (!validate(escapedQuery, query)) {
  2840. throw new URIException(URIException.ESCAPING,
  2841. "escaped query not valid");
  2842. }
  2843. _query = escapedQuery;
  2844. setURI();
  2845. }
  2846. /**
  2847. * Set the escaped query string.
  2848. *
  2849. * @param escapedQuery the escaped query string
  2850. * @throws URIException escaped query not valid
  2851. */
  2852. public void setEscapedQuery(String escapedQuery) throws URIException {
  2853. if (escapedQuery == null) {
  2854. _query = null;
  2855. setURI();
  2856. return;
  2857. }
  2858. setRawQuery(escapedQuery.toCharArray());
  2859. }
  2860. /**
  2861. * Set the query.
  2862. * <p>
  2863. * When a query string is not misunderstood the reserved special characters
  2864. * ("&", "=", "+", ",", and "$") within a query component, it is
  2865. * recommended to use in encoding the whole query with this method.
  2866. * <p>
  2867. * The additional APIs for the special purpose using by the reserved
  2868. * special characters used in each protocol are implemented in each protocol
  2869. * classes inherited from <code>URI</code>. So refer to the same-named APIs
  2870. * implemented in each specific protocol instance.
  2871. *
  2872. * @param query the query string.
  2873. * @throws URIException incomplete trailing escape pattern or unsupported
  2874. * character encoding
  2875. * @see #encode
  2876. */
  2877. public void setQuery(String query) throws URIException {
  2878. if (query == null || query.length() == 0) {
  2879. _query = (query == null) ? null : query.toCharArray();
  2880. setURI();
  2881. return;
  2882. }
  2883. setRawQuery(encode(query, allowed_query, getProtocolCharset()));
  2884. }
  2885. /**
  2886. * Get the raw-escaped query.
  2887. *
  2888. * @return the raw-escaped query
  2889. */
  2890. public char[] getRawQuery() {
  2891. return _query;
  2892. }
  2893. /**
  2894. * Get the escaped query.
  2895. *
  2896. * @return the escaped query string
  2897. */
  2898. public String getEscapedQuery() {
  2899. return (_query == null) ? null : new String(_query);
  2900. }
  2901. /**
  2902. * Get the query.
  2903. *
  2904. * @return the query string.
  2905. * @throws URIException incomplete trailing escape pattern or unsupported
  2906. * character encoding
  2907. * @see #decode
  2908. */
  2909. public String getQuery() throws URIException {
  2910. return (_query == null) ? null : decode(_query, getProtocolCharset());
  2911. }
  2912. // ----------------------------------------------------------- The fragment
  2913. /**
  2914. * Set the raw-escaped fragment.
  2915. *
  2916. * @param escapedFragment the raw-escaped fragment
  2917. * @throws URIException escaped fragment not valid
  2918. */
  2919. public void setRawFragment(char[] escapedFragment) throws URIException {
  2920. if (escapedFragment == null || escapedFragment.length == 0) {
  2921. _fragment = escapedFragment;
  2922. hash = 0;
  2923. return;
  2924. }
  2925. if (!validate(escapedFragment, fragment)) {
  2926. throw new URIException(URIException.ESCAPING,
  2927. "escaped fragment not valid");
  2928. }
  2929. _fragment = escapedFragment;
  2930. hash = 0;
  2931. }
  2932. /**
  2933. * Set the escaped fragment string.
  2934. *
  2935. * @param escapedFragment the escaped fragment string
  2936. * @throws URIException escaped fragment not valid
  2937. */
  2938. public void setEscapedFragment(String escapedFragment) throws URIException {
  2939. if (escapedFragment == null) {
  2940. _fragment = null;
  2941. hash = 0;
  2942. return;
  2943. }
  2944. setRawFragment(escapedFragment.toCharArray());
  2945. }
  2946. /**
  2947. * Set the fragment.
  2948. *
  2949. * @param fragment the fragment string.
  2950. * @throws URIException If an error occurs.
  2951. */
  2952. public void setFragment(String fragment) throws URIException {
  2953. if (fragment == null || fragment.length() == 0) {
  2954. _fragment = (fragment == null) ? null : fragment.toCharArray();
  2955. hash = 0;
  2956. return;
  2957. }
  2958. _fragment = encode(fragment, allowed_fragment, getProtocolCharset());
  2959. hash = 0;
  2960. }
  2961. /**
  2962. * Get the raw-escaped fragment.
  2963. * <p>
  2964. * The optional fragment identifier is not part of a URI, but is often used
  2965. * in conjunction with a URI.
  2966. * <p>
  2967. * The format and interpretation of fragment identifiers is dependent on
  2968. * the media type [RFC2046] of the retrieval result.
  2969. * <p>
  2970. * A fragment identifier is only meaningful when a URI reference is
  2971. * intended for retrieval and the result of that retrieval is a document
  2972. * for which the identified fragment is consistently defined.
  2973. *
  2974. * @return the raw-escaped fragment
  2975. */
  2976. public char[] getRawFragment() {
  2977. return _fragment;
  2978. }
  2979. /**
  2980. * Get the escaped fragment.
  2981. *
  2982. * @return the escaped fragment string
  2983. */
  2984. public String getEscapedFragment() {
  2985. return (_fragment == null) ? null : new String(_fragment);
  2986. }
  2987. /**
  2988. * Get the fragment.
  2989. *
  2990. * @return the fragment string
  2991. * @throws URIException incomplete trailing escape pattern or unsupported
  2992. * character encoding
  2993. * @see #decode
  2994. */
  2995. public String getFragment() throws URIException {
  2996. return (_fragment == null) ? null : decode(_fragment,
  2997. getProtocolCharset());
  2998. }
  2999. // ------------------------------------------------------------- Utilities
  3000. /**
  3001. * Remove the fragment identifier of the given component.
  3002. *
  3003. * @param component the component that a fragment may be included
  3004. * @return the component that the fragment identifier is removed
  3005. */
  3006. protected char[] removeFragmentIdentifier(char[] component) {
  3007. if (component == null) {
  3008. return null;
  3009. }
  3010. int lastIndex = new String(component).indexOf('#');
  3011. if (lastIndex != -1) {
  3012. component = new String(component).substring(0,
  3013. lastIndex).toCharArray();
  3014. }
  3015. return component;
  3016. }
  3017. /**
  3018. * Normalize the given hier path part.
  3019. *
  3020. * <p>Algorithm taken from URI reference parser at
  3021. * http://www.apache.org/~fielding/uri/rev-2002/issues.html.
  3022. *
  3023. * @param path the path to normalize
  3024. * @return the normalized path
  3025. * @throws URIException no more higher path level to be normalized
  3026. */
  3027. protected char[] normalize(char[] path) throws URIException {
  3028. if (path == null) {
  3029. return null;
  3030. }
  3031. String normalized = new String(path);
  3032. // If the buffer begins with "./" or "../", the "." or ".." is removed.
  3033. if (normalized.startsWith("./")) {
  3034. normalized = normalized.substring(1);
  3035. } else if (normalized.startsWith("../")) {
  3036. normalized = normalized.substring(2);
  3037. } else if (normalized.startsWith("..")) {
  3038. normalized = normalized.substring(2);
  3039. }
  3040. // All occurrences of "/./" in the buffer are replaced with "/"
  3041. int index = -1;
  3042. while ((index = normalized.indexOf("/./")) != -1) {
  3043. normalized = normalized.substring(0, index) + normalized.substring(index + 2);
  3044. }
  3045. // If the buffer ends with "/.", the "." is removed.
  3046. if (normalized.endsWith("/.")) {
  3047. normalized = normalized.substring(0, normalized.length() - 1);
  3048. }
  3049. int startIndex = 0;
  3050. // All occurrences of "/<segment>/../" in the buffer, where ".."
  3051. // and <segment> are complete path segments, are iteratively replaced
  3052. // with "/" in order from left to right until no matching pattern remains.
  3053. // If the buffer ends with "/<segment>/..", that is also replaced
  3054. // with "/". Note that <segment> may be empty.
  3055. while ((index = normalized.indexOf("/../", startIndex)) != -1) {
  3056. int slashIndex = normalized.lastIndexOf('/', index - 1);
  3057. if (slashIndex >= 0) {
  3058. normalized = normalized.substring(0, slashIndex) + normalized.substring(index + 3);
  3059. } else {
  3060. startIndex = index + 3;
  3061. }
  3062. }
  3063. if (normalized.endsWith("/..")) {
  3064. int slashIndex = normalized.lastIndexOf('/', normalized.length() - 4);
  3065. if (slashIndex >= 0) {
  3066. normalized = normalized.substring(0, slashIndex + 1);
  3067. }
  3068. }
  3069. // All prefixes of "<segment>/../" in the buffer, where ".."
  3070. // and <segment> are complete path segments, are iteratively replaced
  3071. // with "/" in order from left to right until no matching pattern remains.
  3072. // If the buffer ends with "<segment>/..", that is also replaced
  3073. // with "/". Note that <segment> may be empty.
  3074. while ((index = normalized.indexOf("/../")) != -1) {
  3075. int slashIndex = normalized.lastIndexOf('/', index - 1);
  3076. if (slashIndex >= 0) {
  3077. break;
  3078. } else {
  3079. normalized = normalized.substring(index + 3);
  3080. }
  3081. }
  3082. if (normalized.endsWith("/..")) {
  3083. int slashIndex = normalized.lastIndexOf('/', normalized.length() - 4);
  3084. if (slashIndex < 0) {
  3085. normalized = "/";
  3086. }
  3087. }
  3088. return normalized.toCharArray();
  3089. }
  3090. /**
  3091. * Normalizes the path part of this URI. Normalization is only meant to be performed on
  3092. * URIs with an absolute path. Calling this method on a relative path URI will have no
  3093. * effect.
  3094. *
  3095. * @throws URIException no more higher path level to be normalized
  3096. *
  3097. * @see #isAbsPath()
  3098. */
  3099. public void normalize() throws URIException {
  3100. if (isAbsPath()) {
  3101. _path = normalize(_path);
  3102. setURI();
  3103. }
  3104. }
  3105. /**
  3106. * Test if the first array is equal to the second array.
  3107. *
  3108. * @param first the first character array
  3109. * @param second the second character array
  3110. * @return true if they're equal
  3111. */
  3112. protected boolean equals(char[] first, char[] second) {
  3113. if (first == null && second == null) {
  3114. return true;
  3115. }
  3116. if (first == null || second == null) {
  3117. return false;
  3118. }
  3119. if (first.length != second.length) {
  3120. return false;
  3121. }
  3122. for (int i = 0; i < first.length; i++) {
  3123. if (first[i] != second[i]) {
  3124. return false;
  3125. }
  3126. }
  3127. return true;
  3128. }
  3129. /**
  3130. * Test an object if this URI is equal to another.
  3131. *
  3132. * @param obj an object to compare
  3133. * @return true if two URI objects are equal
  3134. */
  3135. public boolean equals(Object obj) {
  3136. // normalize and test each components
  3137. if (obj == this) {
  3138. return true;
  3139. }
  3140. if (!(obj instanceof URI)) {
  3141. return false;
  3142. }
  3143. URI another = (URI) obj;
  3144. // scheme
  3145. if (!equals(_scheme, another._scheme)) {
  3146. return false;
  3147. }
  3148. // is_opaque_part or is_hier_part? and opaque
  3149. if (!equals(_opaque, another._opaque)) {
  3150. return false;
  3151. }
  3152. // is_hier_part
  3153. // has_authority
  3154. if (!equals(_authority, another._authority)) {
  3155. return false;
  3156. }
  3157. // path
  3158. if (!equals(_path, another._path)) {
  3159. return false;
  3160. }
  3161. // has_query
  3162. if (!equals(_query, another._query)) {
  3163. return false;
  3164. }
  3165. // has_fragment? should be careful of the only fragment case.
  3166. if (!equals(_fragment, another._fragment)) {
  3167. return false;
  3168. }
  3169. return true;
  3170. }
  3171. // ---------------------------------------------------------- Serialization
  3172. /**
  3173. * Write the content of this URI.
  3174. *
  3175. * @param oos the object-output stream
  3176. * @throws IOException If an IO problem occurs.
  3177. */
  3178. protected void writeObject(ObjectOutputStream oos)
  3179. throws IOException {
  3180. oos.defaultWriteObject();
  3181. }
  3182. /**
  3183. * Read a URI.
  3184. *
  3185. * @param ois the object-input stream
  3186. * @throws ClassNotFoundException If one of the classes specified in the
  3187. * input stream cannot be found.
  3188. * @throws IOException If an IO problem occurs.
  3189. */
  3190. protected void readObject(ObjectInputStream ois)
  3191. throws ClassNotFoundException, IOException {
  3192. ois.defaultReadObject();
  3193. }
  3194. // -------------------------------------------------------------- Hash code
  3195. /**
  3196. * Return a hash code for this URI.
  3197. *
  3198. * @return a has code value for this URI
  3199. */
  3200. public int hashCode() {
  3201. if (hash == 0) {
  3202. char[] c = _uri;
  3203. if (c != null) {
  3204. for (int i = 0, len = c.length; i < len; i++) {
  3205. hash = 31 * hash + c[i];
  3206. }
  3207. }
  3208. c = _fragment;
  3209. if (c != null) {
  3210. for (int i = 0, len = c.length; i < len; i++) {
  3211. hash = 31 * hash + c[i];
  3212. }
  3213. }
  3214. }
  3215. return hash;
  3216. }
  3217. // ------------------------------------------------------------- Comparison
  3218. /**
  3219. * Compare this URI to another object.
  3220. *
  3221. * @param obj the object to be compared.
  3222. * @return 0, if it's same,
  3223. * -1, if failed, first being compared with in the authority component
  3224. * @throws ClassCastException not URI argument
  3225. */
  3226. public int compareTo(Object obj) throws ClassCastException {
  3227. URI another = (URI) obj;
  3228. if (!equals(_authority, another.getRawAuthority())) {
  3229. return -1;
  3230. }
  3231. return toString().compareTo(another.toString());
  3232. }
  3233. // ------------------------------------------------------------------ Clone
  3234. /**
  3235. * Create and return a copy of this object, the URI-reference containing
  3236. * the userinfo component. Notice that the whole URI-reference including
  3237. * the userinfo component counld not be gotten as a <code>String</code>.
  3238. * <p>
  3239. * To copy the identical <code>URI</code> object including the userinfo
  3240. * component, it should be used.
  3241. *
  3242. * @return a clone of this instance
  3243. */
  3244. public synchronized Object clone() {
  3245. URI instance = new URI();
  3246. instance._uri = _uri;
  3247. instance._scheme = _scheme;
  3248. instance._opaque = _opaque;
  3249. instance._authority = _authority;
  3250. instance._userinfo = _userinfo;
  3251. instance._host = _host;
  3252. instance._port = _port;
  3253. instance._path = _path;
  3254. instance._query = _query;
  3255. instance._fragment = _fragment;
  3256. // the charset to do escape encoding for this instance
  3257. instance.protocolCharset = protocolCharset;
  3258. // flags
  3259. instance._is_hier_part = _is_hier_part;
  3260. instance._is_opaque_part = _is_opaque_part;
  3261. instance._is_net_path = _is_net_path;
  3262. instance._is_abs_path = _is_abs_path;
  3263. instance._is_rel_path = _is_rel_path;
  3264. instance._is_reg_name = _is_reg_name;
  3265. instance._is_server = _is_server;
  3266. instance._is_hostname = _is_hostname;
  3267. instance._is_IPv4address = _is_IPv4address;
  3268. instance._is_IPv6reference = _is_IPv6reference;
  3269. return instance;
  3270. }
  3271. // ------------------------------------------------------------ Get the URI
  3272. /**
  3273. * It can be gotten the URI character sequence. It's raw-escaped.
  3274. * For the purpose of the protocol to be transported, it will be useful.
  3275. * <p>
  3276. * It is clearly unwise to use a URL that contains a password which is
  3277. * intended to be secret. In particular, the use of a password within
  3278. * the 'userinfo' component of a URL is strongly disrecommended except
  3279. * in those rare cases where the 'password' parameter is intended to be
  3280. * public.
  3281. * <p>
  3282. * When you want to get each part of the userinfo, you need to use the
  3283. * specific methods in the specific URL. It depends on the specific URL.
  3284. *
  3285. * @return the URI character sequence
  3286. */
  3287. public char[] getRawURI() {
  3288. return _uri;
  3289. }
  3290. /**
  3291. * It can be gotten the URI character sequence. It's escaped.
  3292. * For the purpose of the protocol to be transported, it will be useful.
  3293. *
  3294. * @return the escaped URI string
  3295. */
  3296. public String getEscapedURI() {
  3297. return (_uri == null) ? null : new String(_uri);
  3298. }
  3299. /**
  3300. * It can be gotten the URI character sequence.
  3301. *
  3302. * @return the original URI string
  3303. * @throws URIException incomplete trailing escape pattern or unsupported
  3304. * character encoding
  3305. * @see #decode
  3306. */
  3307. public String getURI() throws URIException {
  3308. return (_uri == null) ? null : decode(_uri, getProtocolCharset());
  3309. }
  3310. /**
  3311. * Get the URI reference character sequence.
  3312. *
  3313. * @return the URI reference character sequence
  3314. */
  3315. public char[] getRawURIReference() {
  3316. if (_fragment == null) {
  3317. return _uri;
  3318. }
  3319. if (_uri == null) {
  3320. return _fragment;
  3321. }
  3322. // if _uri != null && _fragment != null
  3323. String uriReference = new String(_uri) + "#" + new String(_fragment);
  3324. return uriReference.toCharArray();
  3325. }
  3326. /**
  3327. * Get the escaped URI reference string.
  3328. *
  3329. * @return the escaped URI reference string
  3330. */
  3331. public String getEscapedURIReference() {
  3332. char[] uriReference = getRawURIReference();
  3333. return (uriReference == null) ? null : new String(uriReference);
  3334. }
  3335. /**
  3336. * Get the original URI reference string.
  3337. *
  3338. * @return the original URI reference string
  3339. * @throws URIException If {@link #decode} fails.
  3340. */
  3341. public String getURIReference() throws URIException {
  3342. char[] uriReference = getRawURIReference();
  3343. return (uriReference == null) ? null : decode(uriReference,
  3344. getProtocolCharset());
  3345. }
  3346. /**
  3347. * Get the escaped URI string.
  3348. * <p>
  3349. * On the document, the URI-reference form is only used without the userinfo
  3350. * component like http://jakarta.apache.org/ by the security reason.
  3351. * But the URI-reference form with the userinfo component could be parsed.
  3352. * <p>
  3353. * In other words, this URI and any its subclasses must not expose the
  3354. * URI-reference expression with the userinfo component like
  3355. * http://user:password@hostport/restricted_zone.<br>
  3356. * It means that the API client programmer should extract each user and
  3357. * password to access manually. Probably it will be supported in the each
  3358. * subclass, however, not a whole URI-reference expression.
  3359. *
  3360. * @return the escaped URI string
  3361. * @see #clone()
  3362. */
  3363. public String toString() {
  3364. return getEscapedURI();
  3365. }
  3366. // ------------------------------------------------------------ Inner class
  3367. /**
  3368. * The charset-changed normal operation to represent to be required to
  3369. * alert to user the fact the default charset is changed.
  3370. */
  3371. public static class DefaultCharsetChanged extends RuntimeException {
  3372. // ------------------------------------------------------- constructors
  3373. /**
  3374. * The constructor with a reason string and its code arguments.
  3375. *
  3376. * @param reasonCode the reason code
  3377. * @param reason the reason
  3378. */
  3379. public DefaultCharsetChanged(int reasonCode, String reason) {
  3380. super(reason);
  3381. this.reason = reason;
  3382. this.reasonCode = reasonCode;
  3383. }
  3384. // ---------------------------------------------------------- constants
  3385. /** No specified reason code. */
  3386. public static final int UNKNOWN = 0;
  3387. /** Protocol charset changed. */
  3388. public static final int PROTOCOL_CHARSET = 1;
  3389. /** Document charset changed. */
  3390. public static final int DOCUMENT_CHARSET = 2;
  3391. // ------------------------------------------------- instance variables
  3392. /** The reason code. */
  3393. private int reasonCode;
  3394. /** The reason message. */
  3395. private String reason;
  3396. // ------------------------------------------------------------ methods
  3397. /**
  3398. * Get the reason code.
  3399. *
  3400. * @return the reason code
  3401. */
  3402. public int getReasonCode() {
  3403. return reasonCode;
  3404. }
  3405. /**
  3406. * Get the reason message.
  3407. *
  3408. * @return the reason message
  3409. */
  3410. public String getReason() {
  3411. return reason;
  3412. }
  3413. }
  3414. /**
  3415. * A mapping to determine the (somewhat arbitrarily) preferred charset for a
  3416. * given locale. Supports all locales recognized in JDK 1.1.
  3417. * <p>
  3418. * The distribution of this class is Servlets.com. It was originally
  3419. * written by Jason Hunter [jhunter at acm.org] and used by with permission.
  3420. */
  3421. public static class LocaleToCharsetMap {
  3422. /** A mapping of language code to charset */
  3423. private static final Hashtable LOCALE_TO_CHARSET_MAP;
  3424. static {
  3425. LOCALE_TO_CHARSET_MAP = new Hashtable();
  3426. LOCALE_TO_CHARSET_MAP.put("ar", "ISO-8859-6");
  3427. LOCALE_TO_CHARSET_MAP.put("be", "ISO-8859-5");
  3428. LOCALE_TO_CHARSET_MAP.put("bg", "ISO-8859-5");
  3429. LOCALE_TO_CHARSET_MAP.put("ca", "ISO-8859-1");
  3430. LOCALE_TO_CHARSET_MAP.put("cs", "ISO-8859-2");
  3431. LOCALE_TO_CHARSET_MAP.put("da", "ISO-8859-1");
  3432. LOCALE_TO_CHARSET_MAP.put("de", "ISO-8859-1");
  3433. LOCALE_TO_CHARSET_MAP.put("el", "ISO-8859-7");
  3434. LOCALE_TO_CHARSET_MAP.put("en", "ISO-8859-1");
  3435. LOCALE_TO_CHARSET_MAP.put("es", "ISO-8859-1");
  3436. LOCALE_TO_CHARSET_MAP.put("et", "ISO-8859-1");
  3437. LOCALE_TO_CHARSET_MAP.put("fi", "ISO-8859-1");
  3438. LOCALE_TO_CHARSET_MAP.put("fr", "ISO-8859-1");
  3439. LOCALE_TO_CHARSET_MAP.put("hr", "ISO-8859-2");
  3440. LOCALE_TO_CHARSET_MAP.put("hu", "ISO-8859-2");
  3441. LOCALE_TO_CHARSET_MAP.put("is", "ISO-8859-1");
  3442. LOCALE_TO_CHARSET_MAP.put("it", "ISO-8859-1");
  3443. LOCALE_TO_CHARSET_MAP.put("iw", "ISO-8859-8");
  3444. LOCALE_TO_CHARSET_MAP.put("ja", "Shift_JIS");
  3445. LOCALE_TO_CHARSET_MAP.put("ko", "EUC-KR");
  3446. LOCALE_TO_CHARSET_MAP.put("lt", "ISO-8859-2");
  3447. LOCALE_TO_CHARSET_MAP.put("lv", "ISO-8859-2");
  3448. LOCALE_TO_CHARSET_MAP.put("mk", "ISO-8859-5");
  3449. LOCALE_TO_CHARSET_MAP.put("nl", "ISO-8859-1");
  3450. LOCALE_TO_CHARSET_MAP.put("no", "ISO-8859-1");
  3451. LOCALE_TO_CHARSET_MAP.put("pl", "ISO-8859-2");
  3452. LOCALE_TO_CHARSET_MAP.put("pt", "ISO-8859-1");
  3453. LOCALE_TO_CHARSET_MAP.put("ro", "ISO-8859-2");
  3454. LOCALE_TO_CHARSET_MAP.put("ru", "ISO-8859-5");
  3455. LOCALE_TO_CHARSET_MAP.put("sh", "ISO-8859-5");
  3456. LOCALE_TO_CHARSET_MAP.put("sk", "ISO-8859-2");
  3457. LOCALE_TO_CHARSET_MAP.put("sl", "ISO-8859-2");
  3458. LOCALE_TO_CHARSET_MAP.put("sq", "ISO-8859-2");
  3459. LOCALE_TO_CHARSET_MAP.put("sr", "ISO-8859-5");
  3460. LOCALE_TO_CHARSET_MAP.put("sv", "ISO-8859-1");
  3461. LOCALE_TO_CHARSET_MAP.put("tr", "ISO-8859-9");
  3462. LOCALE_TO_CHARSET_MAP.put("uk", "ISO-8859-5");
  3463. LOCALE_TO_CHARSET_MAP.put("zh", "GB2312");
  3464. LOCALE_TO_CHARSET_MAP.put("zh_TW", "Big5");
  3465. }
  3466. /**
  3467. * Get the preferred charset for the given locale.
  3468. *
  3469. * @param locale the locale
  3470. * @return the preferred charset or null if the locale is not
  3471. * recognized.
  3472. */
  3473. public static String getCharset(Locale locale) {
  3474. // try for an full name match (may include country)
  3475. String charset =
  3476. (String) LOCALE_TO_CHARSET_MAP.get(locale.toString());
  3477. if (charset != null) {
  3478. return charset;
  3479. }
  3480. // if a full name didn't match, try just the language
  3481. charset = (String) LOCALE_TO_CHARSET_MAP.get(locale.getLanguage());
  3482. return charset; // may be null
  3483. }
  3484. }
  3485. }