1. /*
  2. * Copyright 2001-2004 The Apache Software Foundation.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. package org.apache.commons.codec.net;
  17. import java.io.ByteArrayOutputStream;
  18. import java.io.UnsupportedEncodingException;
  19. import java.util.BitSet;
  20. import org.apache.commons.codec.BinaryDecoder;
  21. import org.apache.commons.codec.BinaryEncoder;
  22. import org.apache.commons.codec.DecoderException;
  23. import org.apache.commons.codec.EncoderException;
  24. import org.apache.commons.codec.StringDecoder;
  25. import org.apache.commons.codec.StringEncoder;
  26. /**
  27. * <p>Implements the 'www-form-urlencoded' encoding scheme,
  28. * also misleadingly known as URL encoding.</p>
  29. *
  30. * <p>For more detailed information please refer to
  31. * <a href="http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1">
  32. * Chapter 17.13.4 'Form content types'</a> of the
  33. * <a href="http://www.w3.org/TR/html4/">HTML 4.01 Specification<a></p>
  34. *
  35. * <p>
  36. * This codec is meant to be a replacement for standard Java classes
  37. * {@link java.net.URLEncoder} and {@link java.net.URLDecoder}
  38. * on older Java platforms, as these classes in Java versions below
  39. * 1.4 rely on the platform's default charset encoding.
  40. * </p>
  41. *
  42. * @author Apache Software Foundation
  43. * @since 1.2
  44. * @version $Id: URLCodec.java,v 1.19 2004/03/29 07:59:00 ggregory Exp $
  45. */
  46. public class URLCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder {
  47. /**
  48. * The default charset used for string decoding and encoding.
  49. */
  50. protected String charset = StringEncodings.UTF8;
  51. protected static byte ESCAPE_CHAR = '%';
  52. /**
  53. * BitSet of www-form-url safe characters.
  54. */
  55. protected static final BitSet WWW_FORM_URL = new BitSet(256);
  56. // Static initializer for www_form_url
  57. static {
  58. // alpha characters
  59. for (int i = 'a'; i <= 'z'; i++) {
  60. WWW_FORM_URL.set(i);
  61. }
  62. for (int i = 'A'; i <= 'Z'; i++) {
  63. WWW_FORM_URL.set(i);
  64. }
  65. // numeric characters
  66. for (int i = '0'; i <= '9'; i++) {
  67. WWW_FORM_URL.set(i);
  68. }
  69. // special chars
  70. WWW_FORM_URL.set('-');
  71. WWW_FORM_URL.set('_');
  72. WWW_FORM_URL.set('.');
  73. WWW_FORM_URL.set('*');
  74. // blank to be replaced with +
  75. WWW_FORM_URL.set(' ');
  76. }
  77. /**
  78. * Default constructor.
  79. */
  80. public URLCodec() {
  81. super();
  82. }
  83. /**
  84. * Constructor which allows for the selection of a default charset
  85. *
  86. * @param charset the default string charset to use.
  87. */
  88. public URLCodec(String charset) {
  89. super();
  90. this.charset = charset;
  91. }
  92. /**
  93. * Encodes an array of bytes into an array of URL safe 7-bit
  94. * characters. Unsafe characters are escaped.
  95. *
  96. * @param urlsafe bitset of characters deemed URL safe
  97. * @param bytes array of bytes to convert to URL safe characters
  98. * @return array of bytes containing URL safe characters
  99. */
  100. public static final byte[] encodeUrl(BitSet urlsafe, byte[] bytes)
  101. {
  102. if (bytes == null) {
  103. return null;
  104. }
  105. if (urlsafe == null) {
  106. urlsafe = WWW_FORM_URL;
  107. }
  108. ByteArrayOutputStream buffer = new ByteArrayOutputStream();
  109. for (int i = 0; i < bytes.length; i++) {
  110. int b = bytes[i];
  111. if (b < 0) {
  112. b = 256 + b;
  113. }
  114. if (urlsafe.get(b)) {
  115. if (b == ' ') {
  116. b = '+';
  117. }
  118. buffer.write(b);
  119. } else {
  120. buffer.write('%');
  121. char hex1 = Character.toUpperCase(
  122. Character.forDigit((b >> 4) & 0xF, 16));
  123. char hex2 = Character.toUpperCase(
  124. Character.forDigit(b & 0xF, 16));
  125. buffer.write(hex1);
  126. buffer.write(hex2);
  127. }
  128. }
  129. return buffer.toByteArray();
  130. }
  131. /**
  132. * Decodes an array of URL safe 7-bit characters into an array of
  133. * original bytes. Escaped characters are converted back to their
  134. * original representation.
  135. *
  136. * @param bytes array of URL safe characters
  137. * @return array of original bytes
  138. * @throws DecoderException Thrown if URL decoding is unsuccessful
  139. */
  140. public static final byte[] decodeUrl(byte[] bytes)
  141. throws DecoderException
  142. {
  143. if (bytes == null) {
  144. return null;
  145. }
  146. ByteArrayOutputStream buffer = new ByteArrayOutputStream();
  147. for (int i = 0; i < bytes.length; i++) {
  148. int b = bytes[i];
  149. if (b == '+') {
  150. buffer.write(' ');
  151. } else if (b == '%') {
  152. try {
  153. int u = Character.digit((char)bytes[++i], 16);
  154. int l = Character.digit((char)bytes[++i], 16);
  155. if (u == -1 || l == -1) {
  156. throw new DecoderException("Invalid URL encoding");
  157. }
  158. buffer.write((char)((u << 4) + l));
  159. } catch(ArrayIndexOutOfBoundsException e) {
  160. throw new DecoderException("Invalid URL encoding");
  161. }
  162. } else {
  163. buffer.write(b);
  164. }
  165. }
  166. return buffer.toByteArray();
  167. }
  168. /**
  169. * Encodes an array of bytes into an array of URL safe 7-bit
  170. * characters. Unsafe characters are escaped.
  171. *
  172. * @param bytes array of bytes to convert to URL safe characters
  173. * @return array of bytes containing URL safe characters
  174. */
  175. public byte[] encode(byte[] bytes) {
  176. return encodeUrl(WWW_FORM_URL, bytes);
  177. }
  178. /**
  179. * Decodes an array of URL safe 7-bit characters into an array of
  180. * original bytes. Escaped characters are converted back to their
  181. * original representation.
  182. *
  183. * @param bytes array of URL safe characters
  184. * @return array of original bytes
  185. * @throws DecoderException Thrown if URL decoding is unsuccessful
  186. */
  187. public byte[] decode(byte[] bytes) throws DecoderException {
  188. return decodeUrl(bytes);
  189. }
  190. /**
  191. * Encodes a string into its URL safe form using the specified
  192. * string charset. Unsafe characters are escaped.
  193. *
  194. * @param pString string to convert to a URL safe form
  195. * @param charset the charset for pString
  196. * @return URL safe string
  197. * @throws UnsupportedEncodingException Thrown if charset is not
  198. * supported
  199. */
  200. public String encode(String pString, String charset)
  201. throws UnsupportedEncodingException
  202. {
  203. if (pString == null) {
  204. return null;
  205. }
  206. return new String(encode(pString.getBytes(charset)), StringEncodings.US_ASCII);
  207. }
  208. /**
  209. * Encodes a string into its URL safe form using the default string
  210. * charset. Unsafe characters are escaped.
  211. *
  212. * @param pString string to convert to a URL safe form
  213. * @return URL safe string
  214. * @throws EncoderException Thrown if URL encoding is unsuccessful
  215. *
  216. * @see #getDefaultCharset()
  217. */
  218. public String encode(String pString) throws EncoderException {
  219. if (pString == null) {
  220. return null;
  221. }
  222. try {
  223. return encode(pString, getDefaultCharset());
  224. } catch(UnsupportedEncodingException e) {
  225. throw new EncoderException(e.getMessage());
  226. }
  227. }
  228. /**
  229. * Decodes a URL safe string into its original form using the
  230. * specified encoding. Escaped characters are converted back
  231. * to their original representation.
  232. *
  233. * @param pString URL safe string to convert into its original form
  234. * @param charset the original string charset
  235. * @return original string
  236. * @throws DecoderException Thrown if URL decoding is unsuccessful
  237. * @throws UnsupportedEncodingException Thrown if charset is not
  238. * supported
  239. */
  240. public String decode(String pString, String charset)
  241. throws DecoderException, UnsupportedEncodingException
  242. {
  243. if (pString == null) {
  244. return null;
  245. }
  246. return new String(decode(pString.getBytes(StringEncodings.US_ASCII)), charset);
  247. }
  248. /**
  249. * Decodes a URL safe string into its original form using the default
  250. * string charset. Escaped characters are converted back to their
  251. * original representation.
  252. *
  253. * @param pString URL safe string to convert into its original form
  254. * @return original string
  255. * @throws DecoderException Thrown if URL decoding is unsuccessful
  256. *
  257. * @see #getDefaultCharset()
  258. */
  259. public String decode(String pString) throws DecoderException {
  260. if (pString == null) {
  261. return null;
  262. }
  263. try {
  264. return decode(pString, getDefaultCharset());
  265. } catch(UnsupportedEncodingException e) {
  266. throw new DecoderException(e.getMessage());
  267. }
  268. }
  269. /**
  270. * Encodes an object into its URL safe form. Unsafe characters are
  271. * escaped.
  272. *
  273. * @param pObject string to convert to a URL safe form
  274. * @return URL safe object
  275. * @throws EncoderException Thrown if URL encoding is not
  276. * applicable to objects of this type or
  277. * if encoding is unsuccessful
  278. */
  279. public Object encode(Object pObject) throws EncoderException {
  280. if (pObject == null) {
  281. return null;
  282. } else if (pObject instanceof byte[]) {
  283. return encode((byte[])pObject);
  284. } else if (pObject instanceof String) {
  285. return encode((String)pObject);
  286. } else {
  287. throw new EncoderException("Objects of type " +
  288. pObject.getClass().getName() + " cannot be URL encoded");
  289. }
  290. }
  291. /**
  292. * Decodes a URL safe object into its original form. Escaped
  293. * characters are converted back to their original representation.
  294. *
  295. * @param pObject URL safe object to convert into its original form
  296. * @return original object
  297. * @throws DecoderException Thrown if URL decoding is not
  298. * applicable to objects of this type
  299. * if decoding is unsuccessful
  300. */
  301. public Object decode(Object pObject) throws DecoderException {
  302. if (pObject == null) {
  303. return null;
  304. } else if (pObject instanceof byte[]) {
  305. return decode((byte[])pObject);
  306. } else if (pObject instanceof String) {
  307. return decode((String)pObject);
  308. } else {
  309. throw new DecoderException("Objects of type " +
  310. pObject.getClass().getName() + " cannot be URL decoded");
  311. }
  312. }
  313. /**
  314. * The <code>String</code> encoding used for decoding and encoding.
  315. *
  316. * @return Returns the encoding.
  317. *
  318. * @deprecated use #getDefaultCharset()
  319. */
  320. public String getEncoding() {
  321. return this.charset;
  322. }
  323. /**
  324. * The default charset used for string decoding and encoding.
  325. *
  326. * @return the default string charset.
  327. */
  328. public String getDefaultCharset() {
  329. return this.charset;
  330. }
  331. }