1. /*
  2. * @(#)URLDecoder.java 1.23 03/01/23
  3. *
  4. * Copyright 2003 Sun Microsystems, Inc. All rights reserved.
  5. * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
  6. */
  7. package java.net;
  8. import java.io.*;
  9. /**
  10. * Utility class for HTML form decoding. This class contains static methods
  11. * for decoding a String from the <CODE>application/x-www-form-urlencoded</CODE>
  12. * MIME format.
  13. * <p>
  14. * To conversion process is the reverse of that used by the URLEncoder class. It is assumed
  15. * that all characters in the encoded string are one of the following:
  16. * "<code>a</code>" through "<code>z</code>",
  17. * "<code>A</code>" through "<code>Z</code>",
  18. * "<code>0</code>" through "<code>9</code>", and
  19. * "<code>-</code>", "<code>_</code>",
  20. * "<code>.</code>", and "<code>*</code>". The
  21. * character "<code>%</code>" is allowed but is interpreted
  22. * as the start of a special escaped sequence.
  23. * <p>
  24. * The following rules are applied in the conversion:
  25. * <p>
  26. * <ul>
  27. * <li>The alphanumeric characters "<code>a</code>" through
  28. * "<code>z</code>", "<code>A</code>" through
  29. * "<code>Z</code>" and "<code>0</code>"
  30. * through "<code>9</code>" remain the same.
  31. * <li>The special characters "<code>.</code>",
  32. * "<code>-</code>", "<code>*</code>", and
  33. * "<code>_</code>" remain the same.
  34. * <li>The plus sign "<code>+</code>" is converted into a
  35. * space character "<code> </code>" .
  36. * <li>A sequence of the form "<code>%<i>xy</i></code>" will be
  37. * treated as representing a byte where <i>xy</i> is the two-digit
  38. * hexadecimal representation of the 8 bits. Then, all substrings
  39. * that contain one or more of these byte sequences consecutively
  40. * will be replaced by the character(s) whose encoding would result
  41. * in those consecutive bytes.
  42. * The encoding scheme used to decode these characters may be specified,
  43. * or if unspecified, the default encoding of the platform will be used.
  44. * </ul>
  45. * <p>
  46. * There are two possible ways in which this decoder could deal with
  47. * illegal strings. It could either leave illegal characters alone or
  48. * it could throw an <tt>{@link java.lang.IllegalArgumentException}</tt>.
  49. * Which approach the decoder takes is left to the
  50. * implementation.
  51. *
  52. * @author Mark Chamness
  53. * @author Michael McCloskey
  54. * @version 1.23, 01/23/03
  55. * @since 1.2
  56. */
  57. public class URLDecoder {
  58. // The platform default encoding
  59. static String dfltEncName = URLEncoder.dfltEncName;
  60. /**
  61. * Decodes a <code>x-www-form-urlencoded</code> string.
  62. * The platform's default encoding is used to determine what characters
  63. * are represented by any consecutive sequences of the form
  64. * "<code>%<i>xy</i></code>".
  65. * @param s the <code>String</code> to decode
  66. * @deprecated The resulting string may vary depending on the platform's
  67. * default encoding. Instead, use the decode(String,String) method
  68. * to specify the encoding.
  69. * @return the newly decoded <code>String</code>
  70. */
  71. public static String decode(String s) {
  72. String str = null;
  73. try {
  74. str = decode(s, dfltEncName);
  75. } catch (UnsupportedEncodingException e) {
  76. // The system should always have the platform default
  77. }
  78. return str;
  79. }
  80. /**
  81. * Decodes a <code>application/x-www-form-urlencoded</code> string using a specific
  82. * encoding scheme.
  83. * The supplied encoding is used to determine
  84. * what characters are represented by any consecutive sequences of the
  85. * form "<code>%<i>xy</i></code>".
  86. * <p>
  87. * <em><strong>Note:</strong> The <a href=
  88. * "http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars">
  89. * World Wide Web Consortium Recommendation</a> states that
  90. * UTF-8 should be used. Not doing so may introduce
  91. * incompatibilites.</em>
  92. *
  93. * @param s the <code>String</code> to decode
  94. * @param enc The name of a supported
  95. * <a href="../lang/package-summary.html#charenc">character
  96. * encoding</a>.
  97. * @return the newly decoded <code>String</code>
  98. * @exception UnsupportedEncodingException
  99. * If the named encoding is not supported
  100. * @see URLEncoder#encode(java.lang.String, java.lang.String)
  101. * @since 1.4
  102. */
  103. public static String decode(String s, String enc)
  104. throws UnsupportedEncodingException{
  105. boolean needToChange = false;
  106. StringBuffer sb = new StringBuffer();
  107. int numChars = s.length();
  108. int i = 0;
  109. if (enc.length() == 0) {
  110. throw new UnsupportedEncodingException ("URLDecoder: empty string enc parameter");
  111. }
  112. while (i < numChars) {
  113. char c = s.charAt(i);
  114. switch (c) {
  115. case '+':
  116. sb.append(' ');
  117. i++;
  118. needToChange = true;
  119. break;
  120. case '%':
  121. /*
  122. * Starting with this instance of %, process all
  123. * consecutive substrings of the form %xy. Each
  124. * substring %xy will yield a byte. Convert all
  125. * consecutive bytes obtained this way to whatever
  126. * character(s) they represent in the provided
  127. * encoding.
  128. */
  129. try {
  130. // (numChars-i)/3 is an upper bound for the number
  131. // of remaining bytes
  132. byte[] bytes = new byte[(numChars-i)/3];
  133. int pos = 0;
  134. while ( ((i+2) < numChars) &&
  135. (c=='%')) {
  136. bytes[pos++] =
  137. (byte)Integer.parseInt(s.substring(i+1,i+3),16);
  138. i+= 3;
  139. if (i < numChars)
  140. c = s.charAt(i);
  141. }
  142. // A trailing, incomplete byte encoding such as
  143. // "%x" will cause an exception to be thrown
  144. if ((i < numChars) && (c=='%'))
  145. throw new IllegalArgumentException(
  146. "URLDecoder: Incomplete trailing escape (%) pattern");
  147. sb.append(new String(bytes, 0, pos, enc));
  148. } catch (NumberFormatException e) {
  149. throw new IllegalArgumentException(
  150. "URLDecoder: Illegal hex characters in escape (%) pattern - "
  151. + e.getMessage());
  152. }
  153. needToChange = true;
  154. break;
  155. default:
  156. sb.append(c);
  157. i++;
  158. break;
  159. }
  160. }
  161. return (needToChange? sb.toString() : s);
  162. }
  163. }