1. /*
  2. * $Header: /home/cvs/jakarta-commons/httpclient/src/java/org/apache/commons/httpclient/util/EncodingUtil.java,v 1.8 2004/05/13 04:01:22 mbecke Exp $
  3. * $Revision: 1.8 $
  4. * $Date: 2004/05/13 04:01:22 $
  5. *
  6. * ====================================================================
  7. *
  8. * Copyright 1999-2004 The Apache Software Foundation
  9. *
  10. * Licensed under the Apache License, Version 2.0 (the "License");
  11. * you may not use this file except in compliance with the License.
  12. * You may obtain a copy of the License at
  13. *
  14. * http://www.apache.org/licenses/LICENSE-2.0
  15. *
  16. * Unless required by applicable law or agreed to in writing, software
  17. * distributed under the License is distributed on an "AS IS" BASIS,
  18. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  19. * See the License for the specific language governing permissions and
  20. * limitations under the License.
  21. * ====================================================================
  22. *
  23. * This software consists of voluntary contributions made by many
  24. * individuals on behalf of the Apache Software Foundation. For more
  25. * information on the Apache Software Foundation, please see
  26. * <http://www.apache.org/>.
  27. *
  28. */
  29. package org.apache.commons.httpclient.util;
  30. import java.io.UnsupportedEncodingException;
  31. import org.apache.commons.codec.net.URLCodec;
  32. import org.apache.commons.httpclient.HttpClientError;
  33. import org.apache.commons.httpclient.NameValuePair;
  34. import org.apache.commons.logging.Log;
  35. import org.apache.commons.logging.LogFactory;
  36. /**
  37. * The home for utility methods that handle various encoding tasks.
  38. *
  39. * @author Michael Becke
  40. * @author <a href="mailto:oleg@ural.ru">Oleg Kalnichevski</a>
  41. *
  42. * @since 2.0 final
  43. */
  44. public class EncodingUtil {
  45. /** Default content encoding chatset */
  46. private static final String DEFAULT_CHARSET = "ISO-8859-1";
  47. /** Log object for this class. */
  48. private static final Log LOG = LogFactory.getLog(EncodingUtil.class);
  49. /**
  50. * Form-urlencoding routine.
  51. *
  52. * The default encoding for all forms is `application/x-www-form-urlencoded'.
  53. * A form data set is represented in this media type as follows:
  54. *
  55. * The form field names and values are escaped: space characters are replaced
  56. * by `+', and then reserved characters are escaped as per [URL]; that is,
  57. * non-alphanumeric characters are replaced by `%HH', a percent sign and two
  58. * hexadecimal digits representing the ASCII code of the character. Line breaks,
  59. * as in multi-line text field values, are represented as CR LF pairs, i.e. `%0D%0A'.
  60. *
  61. * <p>
  62. * if the given charset is not supported, ISO-8859-1 is used instead.
  63. * </p>
  64. *
  65. * @param pairs the values to be encoded
  66. * @param charset the character set of pairs to be encoded
  67. *
  68. * @return the urlencoded pairs
  69. *
  70. * @since 2.0 final
  71. */
  72. public static String formUrlEncode(NameValuePair[] pairs, String charset) {
  73. try {
  74. return doFormUrlEncode(pairs, charset);
  75. } catch (UnsupportedEncodingException e) {
  76. LOG.error("Encoding not supported: " + charset);
  77. try {
  78. return doFormUrlEncode(pairs, DEFAULT_CHARSET);
  79. } catch (UnsupportedEncodingException fatal) {
  80. // Should never happen. ISO-8859-1 must be supported on all JVMs
  81. throw new HttpClientError("Encoding not supported: " +
  82. DEFAULT_CHARSET);
  83. }
  84. }
  85. }
  86. /**
  87. * Form-urlencoding routine.
  88. *
  89. * The default encoding for all forms is `application/x-www-form-urlencoded'.
  90. * A form data set is represented in this media type as follows:
  91. *
  92. * The form field names and values are escaped: space characters are replaced
  93. * by `+', and then reserved characters are escaped as per [URL]; that is,
  94. * non-alphanumeric characters are replaced by `%HH', a percent sign and two
  95. * hexadecimal digits representing the ASCII code of the character. Line breaks,
  96. * as in multi-line text field values, are represented as CR LF pairs, i.e. `%0D%0A'.
  97. *
  98. * @param pairs the values to be encoded
  99. * @param charset the character set of pairs to be encoded
  100. *
  101. * @return the urlencoded pairs
  102. * @throws UnsupportedEncodingException if charset is not supported
  103. *
  104. * @since 2.0 final
  105. */
  106. private static String doFormUrlEncode(NameValuePair[] pairs, String charset)
  107. throws UnsupportedEncodingException
  108. {
  109. StringBuffer buf = new StringBuffer();
  110. for (int i = 0; i < pairs.length; i++) {
  111. URLCodec codec = new URLCodec();
  112. NameValuePair pair = pairs[i];
  113. if (pair.getName() != null) {
  114. if (i > 0) {
  115. buf.append("&");
  116. }
  117. buf.append(codec.encode(pair.getName(), charset));
  118. buf.append("=");
  119. if (pair.getValue() != null) {
  120. buf.append(codec.encode(pair.getValue(), charset));
  121. }
  122. }
  123. }
  124. return buf.toString();
  125. }
  126. /**
  127. * Converts the byte array of HTTP content characters to a string. If
  128. * the specified charset is not supported, default system encoding
  129. * is used.
  130. *
  131. * @param data the byte array to be encoded
  132. * @param offset the index of the first byte to encode
  133. * @param length the number of bytes to encode
  134. * @param charset the desired character encoding
  135. * @return The result of the conversion.
  136. *
  137. * @since 3.0
  138. */
  139. public static String getString(
  140. final byte[] data,
  141. int offset,
  142. int length,
  143. String charset
  144. ) {
  145. if (data == null) {
  146. throw new IllegalArgumentException("Parameter may not be null");
  147. }
  148. if (charset == null || charset.length() == 0) {
  149. throw new IllegalArgumentException("charset may not be null or empty");
  150. }
  151. try {
  152. return new String(data, offset, length, charset);
  153. } catch (UnsupportedEncodingException e) {
  154. if (LOG.isWarnEnabled()) {
  155. LOG.warn("Unsupported encoding: " + charset + ". System encoding used");
  156. }
  157. return new String(data, offset, length);
  158. }
  159. }
  160. /**
  161. * Converts the byte array of HTTP content characters to a string. If
  162. * the specified charset is not supported, default system encoding
  163. * is used.
  164. *
  165. * @param data the byte array to be encoded
  166. * @param charset the desired character encoding
  167. * @return The result of the conversion.
  168. *
  169. * @since 3.0
  170. */
  171. public static String getString(final byte[] data, String charset) {
  172. return getString(data, 0, data.length, charset);
  173. }
  174. /**
  175. * Converts the specified string to a byte array. If the charset is not supported the
  176. * default system charset is used.
  177. *
  178. * @param data the string to be encoded
  179. * @param charset the desired character encoding
  180. * @return The resulting byte array.
  181. *
  182. * @since 3.0
  183. */
  184. public static byte[] getBytes(final String data, String charset) {
  185. if (data == null) {
  186. throw new IllegalArgumentException("data may not be null");
  187. }
  188. if (charset == null || charset.length() == 0) {
  189. throw new IllegalArgumentException("charset may not be null or empty");
  190. }
  191. try {
  192. return data.getBytes(charset);
  193. } catch (UnsupportedEncodingException e) {
  194. if (LOG.isWarnEnabled()) {
  195. LOG.warn("Unsupported encoding: " + charset + ". System encoding used.");
  196. }
  197. return data.getBytes();
  198. }
  199. }
  200. /**
  201. * Converts the specified string to byte array of ASCII characters.
  202. *
  203. * @param data the string to be encoded
  204. * @return The string as a byte array.
  205. *
  206. * @since 3.0
  207. */
  208. public static byte[] getAsciiBytes(final String data) {
  209. if (data == null) {
  210. throw new IllegalArgumentException("Parameter may not be null");
  211. }
  212. try {
  213. return data.getBytes("US-ASCII");
  214. } catch (UnsupportedEncodingException e) {
  215. throw new HttpClientError("HttpClient requires ASCII support");
  216. }
  217. }
  218. /**
  219. * Converts the byte array of ASCII characters to a string. This method is
  220. * to be used when decoding content of HTTP elements (such as response
  221. * headers)
  222. *
  223. * @param data the byte array to be encoded
  224. * @param offset the index of the first byte to encode
  225. * @param length the number of bytes to encode
  226. * @return The string representation of the byte array
  227. *
  228. * @since 3.0
  229. */
  230. public static String getAsciiString(final byte[] data, int offset, int length) {
  231. if (data == null) {
  232. throw new IllegalArgumentException("Parameter may not be null");
  233. }
  234. try {
  235. return new String(data, offset, length, "US-ASCII");
  236. } catch (UnsupportedEncodingException e) {
  237. throw new HttpClientError("HttpClient requires ASCII support");
  238. }
  239. }
  240. /**
  241. * Converts the byte array of ASCII characters to a string. This method is
  242. * to be used when decoding content of HTTP elements (such as response
  243. * headers)
  244. *
  245. * @param data the byte array to be encoded
  246. * @return The string representation of the byte array
  247. *
  248. * @since 3.0
  249. */
  250. public static String getAsciiString(final byte[] data) {
  251. return getAsciiString(data, 0, data.length);
  252. }
  253. /**
  254. * This class should not be instantiated.
  255. */
  256. private EncodingUtil() {
  257. }
  258. }