1. /*
  2. * @(#)URLDecoder.java 1.27 04/05/18
  3. *
  4. * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
  5. * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
  6. */
  7. package java.net;
  8. import java.io.*;
  9. /**
  10. * Utility class for HTML form decoding. This class contains static methods
  11. * for decoding a String from the <CODE>application/x-www-form-urlencoded</CODE>
  12. * MIME format.
  13. * <p>
  14. * To conversion process is the reverse of that used by the URLEncoder class. It is assumed
  15. * that all characters in the encoded string are one of the following:
  16. * "<code>a</code>" through "<code>z</code>",
  17. * "<code>A</code>" through "<code>Z</code>",
  18. * "<code>0</code>" through "<code>9</code>", and
  19. * "<code>-</code>", "<code>_</code>",
  20. * "<code>.</code>", and "<code>*</code>". The
  21. * character "<code>%</code>" is allowed but is interpreted
  22. * as the start of a special escaped sequence.
  23. * <p>
  24. * The following rules are applied in the conversion:
  25. * <p>
  26. * <ul>
  27. * <li>The alphanumeric characters "<code>a</code>" through
  28. * "<code>z</code>", "<code>A</code>" through
  29. * "<code>Z</code>" and "<code>0</code>"
  30. * through "<code>9</code>" remain the same.
  31. * <li>The special characters "<code>.</code>",
  32. * "<code>-</code>", "<code>*</code>", and
  33. * "<code>_</code>" remain the same.
  34. * <li>The plus sign "<code>+</code>" is converted into a
  35. * space character "<code> </code>" .
  36. * <li>A sequence of the form "<code>%<i>xy</i></code>" will be
  37. * treated as representing a byte where <i>xy</i> is the two-digit
  38. * hexadecimal representation of the 8 bits. Then, all substrings
  39. * that contain one or more of these byte sequences consecutively
  40. * will be replaced by the character(s) whose encoding would result
  41. * in those consecutive bytes.
  42. * The encoding scheme used to decode these characters may be specified,
  43. * or if unspecified, the default encoding of the platform will be used.
  44. * </ul>
  45. * <p>
  46. * There are two possible ways in which this decoder could deal with
  47. * illegal strings. It could either leave illegal characters alone or
  48. * it could throw an <tt>{@link java.lang.IllegalArgumentException}</tt>.
  49. * Which approach the decoder takes is left to the
  50. * implementation.
  51. *
  52. * @author Mark Chamness
  53. * @author Michael McCloskey
  54. * @version 1.27, 05/18/04
  55. * @since 1.2
  56. */
  57. public class URLDecoder {
  58. // The platform default encoding
  59. static String dfltEncName = URLEncoder.dfltEncName;
  60. /**
  61. * Decodes a <code>x-www-form-urlencoded</code> string.
  62. * The platform's default encoding is used to determine what characters
  63. * are represented by any consecutive sequences of the form
  64. * "<code>%<i>xy</i></code>".
  65. * @param s the <code>String</code> to decode
  66. * @deprecated The resulting string may vary depending on the platform's
  67. * default encoding. Instead, use the decode(String,String) method
  68. * to specify the encoding.
  69. * @return the newly decoded <code>String</code>
  70. */
  71. @Deprecated
  72. public static String decode(String s) {
  73. String str = null;
  74. try {
  75. str = decode(s, dfltEncName);
  76. } catch (UnsupportedEncodingException e) {
  77. // The system should always have the platform default
  78. }
  79. return str;
  80. }
  81. /**
  82. * Decodes a <code>application/x-www-form-urlencoded</code> string using a specific
  83. * encoding scheme.
  84. * The supplied encoding is used to determine
  85. * what characters are represented by any consecutive sequences of the
  86. * form "<code>%<i>xy</i></code>".
  87. * <p>
  88. * <em><strong>Note:</strong> The <a href=
  89. * "http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars">
  90. * World Wide Web Consortium Recommendation</a> states that
  91. * UTF-8 should be used. Not doing so may introduce
  92. * incompatibilites.</em>
  93. *
  94. * @param s the <code>String</code> to decode
  95. * @param enc The name of a supported
  96. * <a href="../lang/package-summary.html#charenc">character
  97. * encoding</a>.
  98. * @return the newly decoded <code>String</code>
  99. * @exception UnsupportedEncodingException
  100. * If character encoding needs to be consulted, but
  101. * named character encoding is not supported
  102. * @see URLEncoder#encode(java.lang.String, java.lang.String)
  103. * @since 1.4
  104. */
  105. public static String decode(String s, String enc)
  106. throws UnsupportedEncodingException{
  107. boolean needToChange = false;
  108. int numChars = s.length();
  109. StringBuffer sb = new StringBuffer(numChars > 500 ? numChars / 2 : numChars);
  110. int i = 0;
  111. if (enc.length() == 0) {
  112. throw new UnsupportedEncodingException ("URLDecoder: empty string enc parameter");
  113. }
  114. char c;
  115. byte[] bytes = null;
  116. while (i < numChars) {
  117. c = s.charAt(i);
  118. switch (c) {
  119. case '+':
  120. sb.append(' ');
  121. i++;
  122. needToChange = true;
  123. break;
  124. case '%':
  125. /*
  126. * Starting with this instance of %, process all
  127. * consecutive substrings of the form %xy. Each
  128. * substring %xy will yield a byte. Convert all
  129. * consecutive bytes obtained this way to whatever
  130. * character(s) they represent in the provided
  131. * encoding.
  132. */
  133. try {
  134. // (numChars-i)/3 is an upper bound for the number
  135. // of remaining bytes
  136. if (bytes == null)
  137. bytes = new byte[(numChars-i)/3];
  138. int pos = 0;
  139. while ( ((i+2) < numChars) &&
  140. (c=='%')) {
  141. bytes[pos++] =
  142. (byte)Integer.parseInt(s.substring(i+1,i+3),16);
  143. i+= 3;
  144. if (i < numChars)
  145. c = s.charAt(i);
  146. }
  147. // A trailing, incomplete byte encoding such as
  148. // "%x" will cause an exception to be thrown
  149. if ((i < numChars) && (c=='%'))
  150. throw new IllegalArgumentException(
  151. "URLDecoder: Incomplete trailing escape (%) pattern");
  152. sb.append(new String(bytes, 0, pos, enc));
  153. } catch (NumberFormatException e) {
  154. throw new IllegalArgumentException(
  155. "URLDecoder: Illegal hex characters in escape (%) pattern - "
  156. + e.getMessage());
  157. }
  158. needToChange = true;
  159. break;
  160. default:
  161. sb.append(c);
  162. i++;
  163. break;
  164. }
  165. }
  166. return (needToChange? sb.toString() : s);
  167. }
  168. }