1. /*
  2. * $Header: /home/cvs/jakarta-commons/httpclient/src/java/org/apache/commons/httpclient/util/URIUtil.java,v 1.27 2004/05/05 20:34:01 olegk Exp $
  3. * $Revision: 1.27 $
  4. * $Date: 2004/05/05 20:34:01 $
  5. *
  6. * ====================================================================
  7. *
  8. * Copyright 2002-2004 The Apache Software Foundation
  9. *
  10. * Licensed under the Apache License, Version 2.0 (the "License");
  11. * you may not use this file except in compliance with the License.
  12. * You may obtain a copy of the License at
  13. *
  14. * http://www.apache.org/licenses/LICENSE-2.0
  15. *
  16. * Unless required by applicable law or agreed to in writing, software
  17. * distributed under the License is distributed on an "AS IS" BASIS,
  18. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  19. * See the License for the specific language governing permissions and
  20. * limitations under the License.
  21. * ====================================================================
  22. *
  23. * This software consists of voluntary contributions made by many
  24. * individuals on behalf of the Apache Software Foundation. For more
  25. * information on the Apache Software Foundation, please see
  26. * <http://www.apache.org/>.
  27. *
  28. */
  29. package org.apache.commons.httpclient.util;
  30. import java.util.BitSet;
  31. import org.apache.commons.codec.DecoderException;
  32. import org.apache.commons.codec.net.URLCodec;
  33. import org.apache.commons.httpclient.URI;
  34. import org.apache.commons.httpclient.URIException;
  35. /**
  36. * The URI escape and character encoding and decoding utility.
  37. * It's compatible with {@link org.apache.commons.httpclient.HttpURL} rather
  38. * than {@link org.apache.commons.httpclient.URI}.
  39. *
  40. * @author <a href="mailto:jericho@apache.org">Sung-Gu</a>
  41. * @version $Revision: 1.27 $ $Date: 2002/03/14 15:14:01
  42. */
  43. public class URIUtil {
  44. // ----------------------------------------------------- Instance variables
  45. protected static final BitSet empty = new BitSet(1);
  46. // ---------------------------------------------------------- URI utilities
  47. /**
  48. * Get the basename of an URI. It's possibly an empty string.
  49. *
  50. * @param uri a string regarded an URI
  51. * @return the basename string; an empty string if the path ends with slash
  52. */
  53. public static String getName(String uri) {
  54. if (uri == null || uri.length() == 0) { return uri; }
  55. String path = URIUtil.getPath(uri);
  56. int at = path.lastIndexOf("/");
  57. int to = path.length();
  58. return (at >= 0) ? path.substring(at + 1, to) : path;
  59. }
  60. /**
  61. * Get the query of an URI.
  62. *
  63. * @param uri a string regarded an URI
  64. * @return the query string; <code>null</code> if empty or undefined
  65. */
  66. public static String getQuery(String uri) {
  67. if (uri == null || uri.length() == 0) { return null; }
  68. // consider of net_path
  69. int at = uri.indexOf("//");
  70. int from = uri.indexOf(
  71. "/",
  72. at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0
  73. );
  74. // the authority part of URI ignored
  75. int to = uri.length();
  76. // reuse the at and from variables to consider the query
  77. at = uri.indexOf("?", from);
  78. if (at >= 0) {
  79. from = at + 1;
  80. } else {
  81. return null;
  82. }
  83. // check the fragment
  84. if (uri.lastIndexOf("#") > from) {
  85. to = uri.lastIndexOf("#");
  86. }
  87. // get the path and query.
  88. return (from < 0 || from == to) ? null : uri.substring(from, to);
  89. }
  90. /**
  91. * Get the path of an URI.
  92. *
  93. * @param uri a string regarded an URI
  94. * @return the path string
  95. */
  96. public static String getPath(String uri) {
  97. if (uri == null) {
  98. return null;
  99. }
  100. // consider of net_path
  101. int at = uri.indexOf("//");
  102. int from = uri.indexOf(
  103. "/",
  104. at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0
  105. );
  106. // the authority part of URI ignored
  107. int to = uri.length();
  108. // check the query
  109. if (uri.indexOf('?', from) != -1) {
  110. to = uri.indexOf('?', from);
  111. }
  112. // check the fragment
  113. if (uri.lastIndexOf("#") > from && uri.lastIndexOf("#") < to) {
  114. to = uri.lastIndexOf("#");
  115. }
  116. // get only the path.
  117. return (from < 0) ? (at >= 0 ? "/" : uri) : uri.substring(from, to);
  118. }
  119. /**
  120. * Get the path and query of an URI.
  121. *
  122. * @param uri a string regarded an URI
  123. * @return the path and query string
  124. */
  125. public static String getPathQuery(String uri) {
  126. if (uri == null) {
  127. return null;
  128. }
  129. // consider of net_path
  130. int at = uri.indexOf("//");
  131. int from = uri.indexOf(
  132. "/",
  133. at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0
  134. );
  135. // the authority part of URI ignored
  136. int to = uri.length();
  137. // Ignore the '?' mark so to ignore the query.
  138. // check the fragment
  139. if (uri.lastIndexOf("#") > from) {
  140. to = uri.lastIndexOf("#");
  141. }
  142. // get the path and query.
  143. return (from < 0) ? (at >= 0 ? "/" : uri) : uri.substring(from, to);
  144. }
  145. /**
  146. * Get the path of an URI and its rest part.
  147. *
  148. * @param uri a string regarded an URI
  149. * @return the string from the path part
  150. */
  151. public static String getFromPath(String uri) {
  152. if (uri == null) {
  153. return null;
  154. }
  155. // consider of net_path
  156. int at = uri.indexOf("//");
  157. int from = uri.indexOf(
  158. "/",
  159. at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0
  160. );
  161. // get the path and its rest.
  162. return (from < 0) ? (at >= 0 ? "/" : uri) : uri.substring(from);
  163. }
  164. // ----------------------------------------------------- Encoding utilities
  165. /**
  166. * Get the all escaped and encoded string with the default protocl charset.
  167. * It's the same function to use <code>encode(String unescaped, Bitset
  168. * empty, URI.getDefaultProtocolCharset())</code>.
  169. *
  170. * @param unescaped an unescaped string
  171. * @return the escaped string
  172. *
  173. * @throws URIException if the default protocol charset is not supported
  174. *
  175. * @see URI#getDefaultProtocolCharset
  176. * @see #encode
  177. */
  178. public static String encodeAll(String unescaped) throws URIException {
  179. return encodeAll(unescaped, URI.getDefaultProtocolCharset());
  180. }
  181. /**
  182. * Get the all escaped and encoded string with a given charset.
  183. * It's the same function to use <code>encode(String unescaped, Bitset
  184. * empty, String charset)</code>.
  185. *
  186. * @param unescaped an unescaped string
  187. * @param charset the charset
  188. * @return the escaped string
  189. *
  190. * @throws URIException if the charset is not supported
  191. *
  192. * @see #encode
  193. */
  194. public static String encodeAll(String unescaped, String charset)
  195. throws URIException {
  196. return encode(unescaped, empty, charset);
  197. }
  198. /**
  199. * Escape and encode a string regarded as within the authority component of
  200. * an URI with the default protocol charset.
  201. * Within the authority component, the characters ";", ":", "@", "?", and
  202. * "/" are reserved.
  203. *
  204. * @param unescaped an unescaped string
  205. * @return the escaped string
  206. *
  207. * @throws URIException if the default protocol charset is not supported
  208. *
  209. * @see URI#getDefaultProtocolCharset
  210. * @see #encode
  211. */
  212. public static String encodeWithinAuthority(String unescaped)
  213. throws URIException {
  214. return encodeWithinAuthority(unescaped, URI.getDefaultProtocolCharset());
  215. }
  216. /**
  217. * Escape and encode a string regarded as within the authority component of
  218. * an URI with a given charset.
  219. * Within the authority component, the characters ";", ":", "@", "?", and
  220. * "/" are reserved.
  221. *
  222. * @param unescaped an unescaped string
  223. * @param charset the charset
  224. * @return the escaped string
  225. *
  226. * @throws URIException if the charset is not supported
  227. *
  228. * @see #encode
  229. */
  230. public static String encodeWithinAuthority(String unescaped, String charset)
  231. throws URIException {
  232. return encode(unescaped, URI.allowed_within_authority, charset);
  233. }
  234. /**
  235. * Escape and encode a string regarded as the path and query components of
  236. * an URI with the default protocol charset.
  237. *
  238. * @param unescaped an unescaped string
  239. * @return the escaped string
  240. *
  241. * @throws URIException if the default protocol charset is not supported
  242. *
  243. * @see URI#getDefaultProtocolCharset
  244. * @see #encode
  245. */
  246. public static String encodePathQuery(String unescaped) throws URIException {
  247. return encodePathQuery(unescaped, URI.getDefaultProtocolCharset());
  248. }
  249. /**
  250. * Escape and encode a string regarded as the path and query components of
  251. * an URI with a given charset.
  252. *
  253. * @param unescaped an unescaped string
  254. * @param charset the charset
  255. * @return the escaped string
  256. *
  257. * @throws URIException if the charset is not supported
  258. *
  259. * @see #encode
  260. */
  261. public static String encodePathQuery(String unescaped, String charset)
  262. throws URIException {
  263. int at = unescaped.indexOf('?');
  264. if (at < 0) {
  265. return encode(unescaped, URI.allowed_abs_path, charset);
  266. }
  267. // else
  268. return encode(unescaped.substring(0, at), URI.allowed_abs_path, charset)
  269. + '?' + encode(unescaped.substring(at + 1), URI.allowed_query, charset);
  270. }
  271. /**
  272. * Escape and encode a string regarded as within the path component of an
  273. * URI with the default protocol charset.
  274. * The path may consist of a sequence of path segments separated by a
  275. * single slash "/" character. Within a path segment, the characters
  276. * "/", ";", "=", and "?" are reserved.
  277. *
  278. * @param unescaped an unescaped string
  279. * @return the escaped string
  280. *
  281. * @throws URIException if the default protocol charset is not supported
  282. *
  283. * @see URI#getDefaultProtocolCharset
  284. * @see #encode
  285. */
  286. public static String encodeWithinPath(String unescaped)
  287. throws URIException {
  288. return encodeWithinPath(unescaped, URI.getDefaultProtocolCharset());
  289. }
  290. /**
  291. * Escape and encode a string regarded as within the path component of an
  292. * URI with a given charset.
  293. * The path may consist of a sequence of path segments separated by a
  294. * single slash "/" character. Within a path segment, the characters
  295. * "/", ";", "=", and "?" are reserved.
  296. *
  297. * @param unescaped an unescaped string
  298. * @param charset the charset
  299. * @return the escaped string
  300. *
  301. * @throws URIException if the charset is not supported
  302. *
  303. * @see #encode
  304. */
  305. public static String encodeWithinPath(String unescaped, String charset)
  306. throws URIException {
  307. return encode(unescaped, URI.allowed_within_path, charset);
  308. }
  309. /**
  310. * Escape and encode a string regarded as the path component of an URI with
  311. * the default protocol charset.
  312. *
  313. * @param unescaped an unescaped string
  314. * @return the escaped string
  315. *
  316. * @throws URIException if the default protocol charset is not supported
  317. *
  318. * @see URI#getDefaultProtocolCharset
  319. * @see #encode
  320. */
  321. public static String encodePath(String unescaped) throws URIException {
  322. return encodePath(unescaped, URI.getDefaultProtocolCharset());
  323. }
  324. /**
  325. * Escape and encode a string regarded as the path component of an URI with
  326. * a given charset.
  327. *
  328. * @param unescaped an unescaped string
  329. * @param charset the charset
  330. * @return the escaped string
  331. *
  332. * @throws URIException if the charset is not supported
  333. *
  334. * @see #encode
  335. */
  336. public static String encodePath(String unescaped, String charset)
  337. throws URIException {
  338. return encode(unescaped, URI.allowed_abs_path, charset);
  339. }
  340. /**
  341. * Escape and encode a string regarded as within the query component of an
  342. * URI with the default protocol charset.
  343. * When a query comprise the name and value pairs, it is used in order
  344. * to encode each name and value string. The reserved special characters
  345. * within a query component are being included in encoding the query.
  346. *
  347. * @param unescaped an unescaped string
  348. * @return the escaped string
  349. *
  350. * @throws URIException if the default protocol charset is not supported
  351. *
  352. * @see URI#getDefaultProtocolCharset
  353. * @see #encode
  354. */
  355. public static String encodeWithinQuery(String unescaped)
  356. throws URIException {
  357. return encodeWithinQuery(unescaped, URI.getDefaultProtocolCharset());
  358. }
  359. /**
  360. * Escape and encode a string regarded as within the query component of an
  361. * URI with a given charset.
  362. * When a query comprise the name and value pairs, it is used in order
  363. * to encode each name and value string. The reserved special characters
  364. * within a query component are being included in encoding the query.
  365. *
  366. * @param unescaped an unescaped string
  367. * @param charset the charset
  368. * @return the escaped string
  369. *
  370. * @throws URIException if the charset is not supported
  371. *
  372. * @see #encode
  373. */
  374. public static String encodeWithinQuery(String unescaped, String charset)
  375. throws URIException {
  376. return encode(unescaped, URI.allowed_within_query, charset);
  377. }
  378. /**
  379. * Escape and encode a string regarded as the query component of an URI with
  380. * the default protocol charset.
  381. * When a query string is not misunderstood the reserved special characters
  382. * ("&", "=", "+", ",", and "$") within a query component, this method
  383. * is recommended to use in encoding the whole query.
  384. *
  385. * @param unescaped an unescaped string
  386. * @return the escaped string
  387. *
  388. * @throws URIException if the default protocol charset is not supported
  389. *
  390. * @see URI#getDefaultProtocolCharset
  391. * @see #encode
  392. */
  393. public static String encodeQuery(String unescaped) throws URIException {
  394. return encodeQuery(unescaped, URI.getDefaultProtocolCharset());
  395. }
  396. /**
  397. * Escape and encode a string regarded as the query component of an URI with
  398. * a given charset.
  399. * When a query string is not misunderstood the reserved special characters
  400. * ("&", "=", "+", ",", and "$") within a query component, this method
  401. * is recommended to use in encoding the whole query.
  402. *
  403. * @param unescaped an unescaped string
  404. * @param charset the charset
  405. * @return the escaped string
  406. *
  407. * @throws URIException if the charset is not supported
  408. *
  409. * @see #encode
  410. */
  411. public static String encodeQuery(String unescaped, String charset)
  412. throws URIException {
  413. return encode(unescaped, URI.allowed_query, charset);
  414. }
  415. /**
  416. * Escape and encode a given string with allowed characters not to be
  417. * escaped and the default protocol charset.
  418. *
  419. * @param unescaped a string
  420. * @param allowed allowed characters not to be escaped
  421. * @return the escaped string
  422. *
  423. * @throws URIException if the default protocol charset is not supported
  424. *
  425. * @see URI#getDefaultProtocolCharset
  426. */
  427. public static String encode(String unescaped, BitSet allowed)
  428. throws URIException {
  429. return encode(unescaped, allowed, URI.getDefaultProtocolCharset());
  430. }
  431. /**
  432. * Escape and encode a given string with allowed characters not to be
  433. * escaped and a given charset.
  434. *
  435. * @param unescaped a string
  436. * @param allowed allowed characters not to be escaped
  437. * @param charset the charset
  438. * @return the escaped string
  439. */
  440. public static String encode(String unescaped, BitSet allowed,
  441. String charset) throws URIException {
  442. byte[] rawdata = URLCodec.encodeUrl(allowed,
  443. EncodingUtil.getBytes(unescaped, charset));
  444. return EncodingUtil.getAsciiString(rawdata);
  445. }
  446. /**
  447. * Unescape and decode a given string regarded as an escaped string with the
  448. * default protocol charset.
  449. *
  450. * @param escaped a string
  451. * @return the unescaped string
  452. *
  453. * @throws URIException if the string cannot be decoded (invalid)
  454. *
  455. * @see URI#getDefaultProtocolCharset
  456. */
  457. public static String decode(String escaped) throws URIException {
  458. try {
  459. byte[] rawdata = URLCodec.decodeUrl(EncodingUtil.getAsciiBytes(escaped));
  460. return EncodingUtil.getString(rawdata, URI.getDefaultProtocolCharset());
  461. } catch (DecoderException e) {
  462. throw new URIException(e.getMessage());
  463. }
  464. }
  465. /**
  466. * Unescape and decode a given string regarded as an escaped string.
  467. *
  468. * @param escaped a string
  469. * @param charset the charset
  470. * @return the unescaped string
  471. *
  472. * @throws URIException if the charset is not supported
  473. *
  474. * @see Coder#decode
  475. */
  476. public static String decode(String escaped, String charset)
  477. throws URIException {
  478. return Coder.decode(escaped.toCharArray(), charset);
  479. }
  480. // ---------------------------------------------------------- Inner classes
  481. /**
  482. * The basic and internal utility for URI escape and character encoding and
  483. * decoding.
  484. *
  485. * @deprecated use org.apache.commons.codec.net.URLCodec
  486. */
  487. protected static class Coder extends URI {
  488. /**
  489. * Escape and encode a given string with allowed characters not to be
  490. * escaped.
  491. *
  492. * @param unescapedComponent an unescaped component
  493. * @param allowed allowed characters not to be escaped
  494. * @param charset the charset to encode
  495. * @return the escaped and encoded string
  496. *
  497. * @throws URIException if the charset is not supported
  498. *
  499. * @deprecated use org.apache.commons.codec.net.URLCodec
  500. */
  501. public static char[] encode(String unescapedComponent, BitSet allowed, String charset)
  502. throws URIException {
  503. return URI.encode(unescapedComponent, allowed, charset);
  504. }
  505. /**
  506. * Unescape and decode a given string.
  507. *
  508. * @param escapedComponent an being-unescaped component
  509. * @param charset the charset to decode
  510. * @return the escaped and encoded string
  511. *
  512. * @throws URIException if the charset is not supported
  513. *
  514. * @deprecated use org.apache.commons.codec.net.URLCodec
  515. */
  516. public static String decode(char[] escapedComponent, String charset)
  517. throws URIException {
  518. return URI.decode(escapedComponent, charset);
  519. }
  520. /**
  521. * Verify whether a given string is escaped or not
  522. *
  523. * @param original given characters
  524. * @return true if the given character array is 7 bit ASCII-compatible.
  525. */
  526. public static boolean verifyEscaped(char[] original) {
  527. for (int i = 0; i < original.length; i++) {
  528. int c = original[i];
  529. if (c > 128) {
  530. return false;
  531. } else if (c == '%') {
  532. if (Character.digit(original[++i], 16) == -1
  533. || Character.digit(original[++i], 16) == -1) {
  534. return false;
  535. }
  536. }
  537. }
  538. return true;
  539. }
  540. /**
  541. * Replace from a given character to given character in an array order
  542. * for a given string.
  543. *
  544. * @param original a given string
  545. * @param from a replacing character array
  546. * @param to a replaced character array
  547. * @return the replaced string
  548. */
  549. public static String replace(String original, char[] from, char[] to) {
  550. for (int i = from.length; i > 0; --i) {
  551. original = replace(original, from[i], to[i]);
  552. }
  553. return original.toString();
  554. }
  555. /**
  556. * Replace from a given character to given character for a given string.
  557. *
  558. * @param original a given string
  559. * @param from a replacing character array
  560. * @param to a replaced character array
  561. * @return the replaced string
  562. */
  563. public static String replace(String original, char from, char to) {
  564. StringBuffer result = new StringBuffer(original.length());
  565. int at, saved = 0;
  566. do {
  567. at = original.indexOf(from);
  568. if (at >= 0) {
  569. result.append(original.substring(0, at));
  570. result.append(to);
  571. } else {
  572. result.append(original.substring(saved));
  573. }
  574. saved = at;
  575. } while (at >= 0);
  576. return result.toString();
  577. }
  578. }
  579. }