1. /*
  2. * Copyright 2001-2004 The Apache Software Foundation.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. package org.apache.commons.codec.net;
  17. import java.io.ByteArrayOutputStream;
  18. import java.io.UnsupportedEncodingException;
  19. import java.util.BitSet;
  20. import org.apache.commons.codec.BinaryDecoder;
  21. import org.apache.commons.codec.BinaryEncoder;
  22. import org.apache.commons.codec.DecoderException;
  23. import org.apache.commons.codec.EncoderException;
  24. import org.apache.commons.codec.StringDecoder;
  25. import org.apache.commons.codec.StringEncoder;
  26. /**
  27. * <p>
  28. * Codec for the Quoted-Printable section of <a href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521 </a>.
  29. * </p>
  30. * <p>
  31. * The Quoted-Printable encoding is intended to represent data that largely consists of octets that correspond to
  32. * printable characters in the ASCII character set. It encodes the data in such a way that the resulting octets are
  33. * unlikely to be modified by mail transport. If the data being encoded are mostly ASCII text, the encoded form of the
  34. * data remains largely recognizable by humans. A body which is entirely ASCII may also be encoded in Quoted-Printable
  35. * to ensure the integrity of the data should the message pass through a character- translating, and/or line-wrapping
  36. * gateway.
  37. * </p>
  38. *
  39. * <p>
  40. * Note:
  41. * </p>
  42. * <p>
  43. * Rules #3, #4, and #5 of the quoted-printable spec are not implemented yet because the complete quoted-printable spec
  44. * does not lend itself well into the byte[] oriented codec framework. Complete the codec once the steamable codec
  45. * framework is ready. The motivation behind providing the codec in a partial form is that it can already come in handy
  46. * for those applications that do not require quoted-printable line formatting (rules #3, #4, #5), for instance Q codec.
  47. * </p>
  48. *
  49. * @see <a href="http://www.ietf.org/rfc/rfc1521.txt"> RFC 1521 MIME (Multipurpose Internet Mail Extensions) Part One:
  50. * Mechanisms for Specifying and Describing the Format of Internet Message Bodies </a>
  51. *
  52. * @author Apache Software Foundation
  53. * @since 1.3
  54. * @version $Id: QuotedPrintableCodec.java,v 1.7 2004/04/09 22:21:07 ggregory Exp $
  55. */
  56. public class QuotedPrintableCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder {
  57. /**
  58. * The default charset used for string decoding and encoding.
  59. */
  60. private String charset = StringEncodings.UTF8;
  61. /**
  62. * BitSet of printable characters as defined in RFC 1521.
  63. */
  64. private static final BitSet PRINTABLE_CHARS = new BitSet(256);
  65. private static byte ESCAPE_CHAR = '=';
  66. private static byte TAB = 9;
  67. private static byte SPACE = 32;
  68. // Static initializer for printable chars collection
  69. static {
  70. // alpha characters
  71. for (int i = 33; i <= 60; i++) {
  72. PRINTABLE_CHARS.set(i);
  73. }
  74. for (int i = 62; i <= 126; i++) {
  75. PRINTABLE_CHARS.set(i);
  76. }
  77. PRINTABLE_CHARS.set(TAB);
  78. PRINTABLE_CHARS.set(SPACE);
  79. }
  80. /**
  81. * Default constructor.
  82. */
  83. public QuotedPrintableCodec() {
  84. super();
  85. }
  86. /**
  87. * Constructor which allows for the selection of a default charset
  88. *
  89. * @param charset
  90. * the default string charset to use.
  91. */
  92. public QuotedPrintableCodec(String charset) {
  93. super();
  94. this.charset = charset;
  95. }
  96. /**
  97. * Encodes byte into its quoted-printable representation.
  98. *
  99. * @param b
  100. * byte to encode
  101. * @param buffer
  102. * the buffer to write to
  103. */
  104. private static final void encodeQuotedPrintable(int b, ByteArrayOutputStream buffer) {
  105. buffer.write(ESCAPE_CHAR);
  106. char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16));
  107. char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, 16));
  108. buffer.write(hex1);
  109. buffer.write(hex2);
  110. }
  111. /**
  112. * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
  113. *
  114. * <p>
  115. * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
  116. * RFC 1521 and is suitable for encoding binary data and unformatted text.
  117. * </p>
  118. *
  119. * @param printable
  120. * bitset of characters deemed quoted-printable
  121. * @param bytes
  122. * array of bytes to be encoded
  123. * @return array of bytes containing quoted-printable data
  124. */
  125. public static final byte[] encodeQuotedPrintable(BitSet printable, byte[] bytes) {
  126. if (bytes == null) {
  127. return null;
  128. }
  129. if (printable == null) {
  130. printable = PRINTABLE_CHARS;
  131. }
  132. ByteArrayOutputStream buffer = new ByteArrayOutputStream();
  133. for (int i = 0; i < bytes.length; i++) {
  134. int b = bytes[i];
  135. if (b < 0) {
  136. b = 256 + b;
  137. }
  138. if (printable.get(b)) {
  139. buffer.write(b);
  140. } else {
  141. encodeQuotedPrintable(b, buffer);
  142. }
  143. }
  144. return buffer.toByteArray();
  145. }
  146. /**
  147. * Decodes an array quoted-printable characters into an array of original bytes. Escaped characters are converted
  148. * back to their original representation.
  149. *
  150. * <p>
  151. * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
  152. * RFC 1521.
  153. * </p>
  154. *
  155. * @param bytes
  156. * array of quoted-printable characters
  157. * @return array of original bytes
  158. * @throws DecoderException
  159. * Thrown if quoted-printable decoding is unsuccessful
  160. */
  161. public static final byte[] decodeQuotedPrintable(byte[] bytes) throws DecoderException {
  162. if (bytes == null) {
  163. return null;
  164. }
  165. ByteArrayOutputStream buffer = new ByteArrayOutputStream();
  166. for (int i = 0; i < bytes.length; i++) {
  167. int b = bytes[i];
  168. if (b == ESCAPE_CHAR) {
  169. try {
  170. int u = Character.digit((char) bytes[++i], 16);
  171. int l = Character.digit((char) bytes[++i], 16);
  172. if (u == -1 || l == -1) {
  173. throw new DecoderException("Invalid quoted-printable encoding");
  174. }
  175. buffer.write((char) ((u << 4) + l));
  176. } catch (ArrayIndexOutOfBoundsException e) {
  177. throw new DecoderException("Invalid quoted-printable encoding");
  178. }
  179. } else {
  180. buffer.write(b);
  181. }
  182. }
  183. return buffer.toByteArray();
  184. }
  185. /**
  186. * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
  187. *
  188. * <p>
  189. * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
  190. * RFC 1521 and is suitable for encoding binary data and unformatted text.
  191. * </p>
  192. *
  193. * @param bytes
  194. * array of bytes to be encoded
  195. * @return array of bytes containing quoted-printable data
  196. */
  197. public byte[] encode(byte[] bytes) {
  198. return encodeQuotedPrintable(PRINTABLE_CHARS, bytes);
  199. }
  200. /**
  201. * Decodes an array of quoted-printable characters into an array of original bytes. Escaped characters are converted
  202. * back to their original representation.
  203. *
  204. * <p>
  205. * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
  206. * RFC 1521.
  207. * </p>
  208. *
  209. * @param bytes
  210. * array of quoted-printable characters
  211. * @return array of original bytes
  212. * @throws DecoderException
  213. * Thrown if quoted-printable decoding is unsuccessful
  214. */
  215. public byte[] decode(byte[] bytes) throws DecoderException {
  216. return decodeQuotedPrintable(bytes);
  217. }
  218. /**
  219. * Encodes a string into its quoted-printable form using the default string charset. Unsafe characters are escaped.
  220. *
  221. * <p>
  222. * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
  223. * RFC 1521 and is suitable for encoding binary data.
  224. * </p>
  225. *
  226. * @param pString
  227. * string to convert to quoted-printable form
  228. * @return quoted-printable string
  229. *
  230. * @throws EncoderException
  231. * Thrown if quoted-printable encoding is unsuccessful
  232. *
  233. * @see #getDefaultCharset()
  234. */
  235. public String encode(String pString) throws EncoderException {
  236. if (pString == null) {
  237. return null;
  238. }
  239. try {
  240. return encode(pString, getDefaultCharset());
  241. } catch (UnsupportedEncodingException e) {
  242. throw new EncoderException(e.getMessage());
  243. }
  244. }
  245. /**
  246. * Decodes a quoted-printable string into its original form using the specified string charset. Escaped characters
  247. * are converted back to their original representation.
  248. *
  249. * @param pString
  250. * quoted-printable string to convert into its original form
  251. * @param charset
  252. * the original string charset
  253. * @return original string
  254. * @throws DecoderException
  255. * Thrown if quoted-printable decoding is unsuccessful
  256. * @throws UnsupportedEncodingException
  257. * Thrown if charset is not supported
  258. */
  259. public String decode(String pString, String charset) throws DecoderException, UnsupportedEncodingException {
  260. if (pString == null) {
  261. return null;
  262. }
  263. return new String(decode(pString.getBytes(StringEncodings.US_ASCII)), charset);
  264. }
  265. /**
  266. * Decodes a quoted-printable string into its original form using the default string charset. Escaped characters are
  267. * converted back to their original representation.
  268. *
  269. * @param pString
  270. * quoted-printable string to convert into its original form
  271. * @return original string
  272. * @throws DecoderException
  273. * Thrown if quoted-printable decoding is unsuccessful
  274. * @throws UnsupportedEncodingException
  275. * Thrown if charset is not supported
  276. * @see #getDefaultCharset()
  277. */
  278. public String decode(String pString) throws DecoderException {
  279. if (pString == null) {
  280. return null;
  281. }
  282. try {
  283. return decode(pString, getDefaultCharset());
  284. } catch (UnsupportedEncodingException e) {
  285. throw new DecoderException(e.getMessage());
  286. }
  287. }
  288. /**
  289. * Encodes an object into its quoted-printable safe form. Unsafe characters are escaped.
  290. *
  291. * @param pObject
  292. * string to convert to a quoted-printable form
  293. * @return quoted-printable object
  294. * @throws EncoderException
  295. * Thrown if quoted-printable encoding is not applicable to objects of this type or if encoding is
  296. * unsuccessful
  297. */
  298. public Object encode(Object pObject) throws EncoderException {
  299. if (pObject == null) {
  300. return null;
  301. } else if (pObject instanceof byte[]) {
  302. return encode((byte[]) pObject);
  303. } else if (pObject instanceof String) {
  304. return encode((String) pObject);
  305. } else {
  306. throw new EncoderException("Objects of type "
  307. + pObject.getClass().getName()
  308. + " cannot be quoted-printable encoded");
  309. }
  310. }
  311. /**
  312. * Decodes a quoted-printable object into its original form. Escaped characters are converted back to their original
  313. * representation.
  314. *
  315. * @param pObject
  316. * quoted-printable object to convert into its original form
  317. * @return original object
  318. * @throws DecoderException
  319. * Thrown if quoted-printable decoding is not applicable to objects of this type if decoding is
  320. * unsuccessful
  321. */
  322. public Object decode(Object pObject) throws DecoderException {
  323. if (pObject == null) {
  324. return null;
  325. } else if (pObject instanceof byte[]) {
  326. return decode((byte[]) pObject);
  327. } else if (pObject instanceof String) {
  328. return decode((String) pObject);
  329. } else {
  330. throw new DecoderException("Objects of type "
  331. + pObject.getClass().getName()
  332. + " cannot be quoted-printable decoded");
  333. }
  334. }
  335. /**
  336. * Returns the default charset used for string decoding and encoding.
  337. *
  338. * @return the default string charset.
  339. */
  340. public String getDefaultCharset() {
  341. return this.charset;
  342. }
  343. /**
  344. * Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped.
  345. *
  346. * <p>
  347. * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
  348. * RFC 1521 and is suitable for encoding binary data and unformatted text.
  349. * </p>
  350. *
  351. * @param pString
  352. * string to convert to quoted-printable form
  353. * @param charset
  354. * the charset for pString
  355. * @return quoted-printable string
  356. *
  357. * @throws UnsupportedEncodingException
  358. * Thrown if the charset is not supported
  359. */
  360. public String encode(String pString, String charset) throws UnsupportedEncodingException {
  361. if (pString == null) {
  362. return null;
  363. }
  364. return new String(encode(pString.getBytes(charset)), StringEncodings.US_ASCII);
  365. }
  366. }