1. /*
  2. * Copyright 2001-2004 The Apache Software Foundation.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. package org.apache.commons.codec.binary;
  17. import org.apache.commons.codec.BinaryDecoder;
  18. import org.apache.commons.codec.BinaryEncoder;
  19. import org.apache.commons.codec.DecoderException;
  20. import org.apache.commons.codec.EncoderException;
  21. /**
  22. * Provides Base64 encoding and decoding as defined by RFC 2045.
  23. *
  24. * <p>This class implements section <cite>6.8. Base64 Content-Transfer-Encoding</cite>
  25. * from RFC 2045 <cite>Multipurpose Internet Mail Extensions (MIME) Part One:
  26. * Format of Internet Message Bodies</cite> by Freed and Borenstein.</p>
  27. *
  28. * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045</a>
  29. * @author Apache Software Foundation
  30. * @since 1.0-dev
  31. * @version $Id: Base64.java,v 1.20 2004/05/24 00:21:24 ggregory Exp $
  32. */
  33. public class Base64 implements BinaryEncoder, BinaryDecoder {
  34. /**
  35. * Chunk size per RFC 2045 section 6.8.
  36. *
  37. * <p>The {@value} character limit does not count the trailing CRLF, but counts
  38. * all other characters, including any equal signs.</p>
  39. *
  40. * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 6.8</a>
  41. */
  42. static final int CHUNK_SIZE = 76;
  43. /**
  44. * Chunk separator per RFC 2045 section 2.1.
  45. *
  46. * @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045 section 2.1</a>
  47. */
  48. static final byte[] CHUNK_SEPARATOR = "\r\n".getBytes();
  49. /**
  50. * The base length.
  51. */
  52. static final int BASELENGTH = 255;
  53. /**
  54. * Lookup length.
  55. */
  56. static final int LOOKUPLENGTH = 64;
  57. /**
  58. * Used to calculate the number of bits in a byte.
  59. */
  60. static final int EIGHTBIT = 8;
  61. /**
  62. * Used when encoding something which has fewer than 24 bits.
  63. */
  64. static final int SIXTEENBIT = 16;
  65. /**
  66. * Used to determine how many bits data contains.
  67. */
  68. static final int TWENTYFOURBITGROUP = 24;
  69. /**
  70. * Used to get the number of Quadruples.
  71. */
  72. static final int FOURBYTE = 4;
  73. /**
  74. * Used to test the sign of a byte.
  75. */
  76. static final int SIGN = -128;
  77. /**
  78. * Byte used to pad output.
  79. */
  80. static final byte PAD = (byte) '=';
  81. // Create arrays to hold the base64 characters and a
  82. // lookup for base64 chars
  83. private static byte[] base64Alphabet = new byte[BASELENGTH];
  84. private static byte[] lookUpBase64Alphabet = new byte[LOOKUPLENGTH];
  85. // Populating the lookup and character arrays
  86. static {
  87. for (int i = 0; i < BASELENGTH; i++) {
  88. base64Alphabet[i] = (byte) -1;
  89. }
  90. for (int i = 'Z'; i >= 'A'; i--) {
  91. base64Alphabet[i] = (byte) (i - 'A');
  92. }
  93. for (int i = 'z'; i >= 'a'; i--) {
  94. base64Alphabet[i] = (byte) (i - 'a' + 26);
  95. }
  96. for (int i = '9'; i >= '0'; i--) {
  97. base64Alphabet[i] = (byte) (i - '0' + 52);
  98. }
  99. base64Alphabet['+'] = 62;
  100. base64Alphabet['/'] = 63;
  101. for (int i = 0; i <= 25; i++) {
  102. lookUpBase64Alphabet[i] = (byte) ('A' + i);
  103. }
  104. for (int i = 26, j = 0; i <= 51; i++, j++) {
  105. lookUpBase64Alphabet[i] = (byte) ('a' + j);
  106. }
  107. for (int i = 52, j = 0; i <= 61; i++, j++) {
  108. lookUpBase64Alphabet[i] = (byte) ('0' + j);
  109. }
  110. lookUpBase64Alphabet[62] = (byte) '+';
  111. lookUpBase64Alphabet[63] = (byte) '/';
  112. }
  113. private static boolean isBase64(byte octect) {
  114. if (octect == PAD) {
  115. return true;
  116. } else if (base64Alphabet[octect] == -1) {
  117. return false;
  118. } else {
  119. return true;
  120. }
  121. }
  122. /**
  123. * Tests a given byte array to see if it contains
  124. * only valid characters within the Base64 alphabet.
  125. *
  126. * @param arrayOctect byte array to test
  127. * @return true if all bytes are valid characters in the Base64
  128. * alphabet or if the byte array is empty; false, otherwise
  129. */
  130. public static boolean isArrayByteBase64(byte[] arrayOctect) {
  131. arrayOctect = discardWhitespace(arrayOctect);
  132. int length = arrayOctect.length;
  133. if (length == 0) {
  134. // shouldn't a 0 length array be valid base64 data?
  135. // return false;
  136. return true;
  137. }
  138. for (int i = 0; i < length; i++) {
  139. if (!isBase64(arrayOctect[i])) {
  140. return false;
  141. }
  142. }
  143. return true;
  144. }
  145. /**
  146. * Encodes binary data using the base64 algorithm but
  147. * does not chunk the output.
  148. *
  149. * @param binaryData binary data to encode
  150. * @return Base64 characters
  151. */
  152. public static byte[] encodeBase64(byte[] binaryData) {
  153. return encodeBase64(binaryData, false);
  154. }
  155. /**
  156. * Encodes binary data using the base64 algorithm and chunks
  157. * the encoded output into 76 character blocks
  158. *
  159. * @param binaryData binary data to encode
  160. * @return Base64 characters chunked in 76 character blocks
  161. */
  162. public static byte[] encodeBase64Chunked(byte[] binaryData) {
  163. return encodeBase64(binaryData, true);
  164. }
  165. /**
  166. * Decodes an Object using the base64 algorithm. This method
  167. * is provided in order to satisfy the requirements of the
  168. * Decoder interface, and will throw a DecoderException if the
  169. * supplied object is not of type byte[].
  170. *
  171. * @param pObject Object to decode
  172. * @return An object (of type byte[]) containing the
  173. * binary data which corresponds to the byte[] supplied.
  174. * @throws DecoderException if the parameter supplied is not
  175. * of type byte[]
  176. */
  177. public Object decode(Object pObject) throws DecoderException {
  178. if (!(pObject instanceof byte[])) {
  179. throw new DecoderException("Parameter supplied to Base64 decode is not a byte[]");
  180. }
  181. return decode((byte[]) pObject);
  182. }
  183. /**
  184. * Decodes a byte[] containing containing
  185. * characters in the Base64 alphabet.
  186. *
  187. * @param pArray A byte array containing Base64 character data
  188. * @return a byte array containing binary data
  189. */
  190. public byte[] decode(byte[] pArray) {
  191. return decodeBase64(pArray);
  192. }
  193. /**
  194. * Encodes binary data using the base64 algorithm, optionally
  195. * chunking the output into 76 character blocks.
  196. *
  197. * @param binaryData Array containing binary data to encode.
  198. * @param isChunked if isChunked is true this encoder will chunk
  199. * the base64 output into 76 character blocks
  200. * @return Base64-encoded data.
  201. */
  202. public static byte[] encodeBase64(byte[] binaryData, boolean isChunked) {
  203. int lengthDataBits = binaryData.length * EIGHTBIT;
  204. int fewerThan24bits = lengthDataBits % TWENTYFOURBITGROUP;
  205. int numberTriplets = lengthDataBits / TWENTYFOURBITGROUP;
  206. byte encodedData[] = null;
  207. int encodedDataLength = 0;
  208. int nbrChunks = 0;
  209. if (fewerThan24bits != 0) {
  210. //data not divisible by 24 bit
  211. encodedDataLength = (numberTriplets + 1) * 4;
  212. } else {
  213. // 16 or 8 bit
  214. encodedDataLength = numberTriplets * 4;
  215. }
  216. // If the output is to be "chunked" into 76 character sections,
  217. // for compliance with RFC 2045 MIME, then it is important to
  218. // allow for extra length to account for the separator(s)
  219. if (isChunked) {
  220. nbrChunks =
  221. (CHUNK_SEPARATOR.length == 0 ? 0 : (int) Math.ceil((float) encodedDataLength / CHUNK_SIZE));
  222. encodedDataLength += nbrChunks * CHUNK_SEPARATOR.length;
  223. }
  224. encodedData = new byte[encodedDataLength];
  225. byte k = 0, l = 0, b1 = 0, b2 = 0, b3 = 0;
  226. int encodedIndex = 0;
  227. int dataIndex = 0;
  228. int i = 0;
  229. int nextSeparatorIndex = CHUNK_SIZE;
  230. int chunksSoFar = 0;
  231. //log.debug("number of triplets = " + numberTriplets);
  232. for (i = 0; i < numberTriplets; i++) {
  233. dataIndex = i * 3;
  234. b1 = binaryData[dataIndex];
  235. b2 = binaryData[dataIndex + 1];
  236. b3 = binaryData[dataIndex + 2];
  237. //log.debug("b1= " + b1 +", b2= " + b2 + ", b3= " + b3);
  238. l = (byte) (b2 & 0x0f);
  239. k = (byte) (b1 & 0x03);
  240. byte val1 =
  241. ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0);
  242. byte val2 =
  243. ((b2 & SIGN) == 0) ? (byte) (b2 >> 4) : (byte) ((b2) >> 4 ^ 0xf0);
  244. byte val3 =
  245. ((b3 & SIGN) == 0) ? (byte) (b3 >> 6) : (byte) ((b3) >> 6 ^ 0xfc);
  246. encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
  247. //log.debug( "val2 = " + val2 );
  248. //log.debug( "k4 = " + (k<<4) );
  249. //log.debug( "vak = " + (val2 | (k<<4)) );
  250. encodedData[encodedIndex + 1] =
  251. lookUpBase64Alphabet[val2 | (k << 4)];
  252. encodedData[encodedIndex + 2] =
  253. lookUpBase64Alphabet[(l << 2) | val3];
  254. encodedData[encodedIndex + 3] = lookUpBase64Alphabet[b3 & 0x3f];
  255. encodedIndex += 4;
  256. // If we are chunking, let's put a chunk separator down.
  257. if (isChunked) {
  258. // this assumes that CHUNK_SIZE % 4 == 0
  259. if (encodedIndex == nextSeparatorIndex) {
  260. System.arraycopy(
  261. CHUNK_SEPARATOR,
  262. 0,
  263. encodedData,
  264. encodedIndex,
  265. CHUNK_SEPARATOR.length);
  266. chunksSoFar++;
  267. nextSeparatorIndex =
  268. (CHUNK_SIZE * (chunksSoFar + 1)) +
  269. (chunksSoFar * CHUNK_SEPARATOR.length);
  270. encodedIndex += CHUNK_SEPARATOR.length;
  271. }
  272. }
  273. }
  274. // form integral number of 6-bit groups
  275. dataIndex = i * 3;
  276. if (fewerThan24bits == EIGHTBIT) {
  277. b1 = binaryData[dataIndex];
  278. k = (byte) (b1 & 0x03);
  279. //log.debug("b1=" + b1);
  280. //log.debug("b1<<2 = " + (b1>>2) );
  281. byte val1 =
  282. ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0);
  283. encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
  284. encodedData[encodedIndex + 1] = lookUpBase64Alphabet[k << 4];
  285. encodedData[encodedIndex + 2] = PAD;
  286. encodedData[encodedIndex + 3] = PAD;
  287. } else if (fewerThan24bits == SIXTEENBIT) {
  288. b1 = binaryData[dataIndex];
  289. b2 = binaryData[dataIndex + 1];
  290. l = (byte) (b2 & 0x0f);
  291. k = (byte) (b1 & 0x03);
  292. byte val1 =
  293. ((b1 & SIGN) == 0) ? (byte) (b1 >> 2) : (byte) ((b1) >> 2 ^ 0xc0);
  294. byte val2 =
  295. ((b2 & SIGN) == 0) ? (byte) (b2 >> 4) : (byte) ((b2) >> 4 ^ 0xf0);
  296. encodedData[encodedIndex] = lookUpBase64Alphabet[val1];
  297. encodedData[encodedIndex + 1] =
  298. lookUpBase64Alphabet[val2 | (k << 4)];
  299. encodedData[encodedIndex + 2] = lookUpBase64Alphabet[l << 2];
  300. encodedData[encodedIndex + 3] = PAD;
  301. }
  302. if (isChunked) {
  303. // we also add a separator to the end of the final chunk.
  304. if (chunksSoFar < nbrChunks) {
  305. System.arraycopy(
  306. CHUNK_SEPARATOR,
  307. 0,
  308. encodedData,
  309. encodedDataLength - CHUNK_SEPARATOR.length,
  310. CHUNK_SEPARATOR.length);
  311. }
  312. }
  313. return encodedData;
  314. }
  315. /**
  316. * Decodes Base64 data into octects
  317. *
  318. * @param base64Data Byte array containing Base64 data
  319. * @return Array containing decoded data.
  320. */
  321. public static byte[] decodeBase64(byte[] base64Data) {
  322. // RFC 2045 requires that we discard ALL non-Base64 characters
  323. base64Data = discardNonBase64(base64Data);
  324. // handle the edge case, so we don't have to worry about it later
  325. if (base64Data.length == 0) {
  326. return new byte[0];
  327. }
  328. int numberQuadruple = base64Data.length / FOURBYTE;
  329. byte decodedData[] = null;
  330. byte b1 = 0, b2 = 0, b3 = 0, b4 = 0, marker0 = 0, marker1 = 0;
  331. // Throw away anything not in base64Data
  332. int encodedIndex = 0;
  333. int dataIndex = 0;
  334. {
  335. // this sizes the output array properly - rlw
  336. int lastData = base64Data.length;
  337. // ignore the '=' padding
  338. while (base64Data[lastData - 1] == PAD) {
  339. if (--lastData == 0) {
  340. return new byte[0];
  341. }
  342. }
  343. decodedData = new byte[lastData - numberQuadruple];
  344. }
  345. for (int i = 0; i < numberQuadruple; i++) {
  346. dataIndex = i * 4;
  347. marker0 = base64Data[dataIndex + 2];
  348. marker1 = base64Data[dataIndex + 3];
  349. b1 = base64Alphabet[base64Data[dataIndex]];
  350. b2 = base64Alphabet[base64Data[dataIndex + 1]];
  351. if (marker0 != PAD && marker1 != PAD) {
  352. //No PAD e.g 3cQl
  353. b3 = base64Alphabet[marker0];
  354. b4 = base64Alphabet[marker1];
  355. decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
  356. decodedData[encodedIndex + 1] =
  357. (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf));
  358. decodedData[encodedIndex + 2] = (byte) (b3 << 6 | b4);
  359. } else if (marker0 == PAD) {
  360. //Two PAD e.g. 3c[Pad][Pad]
  361. decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
  362. } else if (marker1 == PAD) {
  363. //One PAD e.g. 3cQ[Pad]
  364. b3 = base64Alphabet[marker0];
  365. decodedData[encodedIndex] = (byte) (b1 << 2 | b2 >> 4);
  366. decodedData[encodedIndex + 1] =
  367. (byte) (((b2 & 0xf) << 4) | ((b3 >> 2) & 0xf));
  368. }
  369. encodedIndex += 3;
  370. }
  371. return decodedData;
  372. }
  373. /**
  374. * Discards any whitespace from a base-64 encoded block.
  375. *
  376. * @param data The base-64 encoded data to discard the whitespace
  377. * from.
  378. * @return The data, less whitespace (see RFC 2045).
  379. */
  380. static byte[] discardWhitespace(byte[] data) {
  381. byte groomedData[] = new byte[data.length];
  382. int bytesCopied = 0;
  383. for (int i = 0; i < data.length; i++) {
  384. switch (data[i]) {
  385. case (byte) ' ' :
  386. case (byte) '\n' :
  387. case (byte) '\r' :
  388. case (byte) '\t' :
  389. break;
  390. default:
  391. groomedData[bytesCopied++] = data[i];
  392. }
  393. }
  394. byte packedData[] = new byte[bytesCopied];
  395. System.arraycopy(groomedData, 0, packedData, 0, bytesCopied);
  396. return packedData;
  397. }
  398. /**
  399. * Discards any characters outside of the base64 alphabet, per
  400. * the requirements on page 25 of RFC 2045 - "Any characters
  401. * outside of the base64 alphabet are to be ignored in base64
  402. * encoded data."
  403. *
  404. * @param data The base-64 encoded data to groom
  405. * @return The data, less non-base64 characters (see RFC 2045).
  406. */
  407. static byte[] discardNonBase64(byte[] data) {
  408. byte groomedData[] = new byte[data.length];
  409. int bytesCopied = 0;
  410. for (int i = 0; i < data.length; i++) {
  411. if (isBase64(data[i])) {
  412. groomedData[bytesCopied++] = data[i];
  413. }
  414. }
  415. byte packedData[] = new byte[bytesCopied];
  416. System.arraycopy(groomedData, 0, packedData, 0, bytesCopied);
  417. return packedData;
  418. }
  419. // Implementation of the Encoder Interface
  420. /**
  421. * Encodes an Object using the base64 algorithm. This method
  422. * is provided in order to satisfy the requirements of the
  423. * Encoder interface, and will throw an EncoderException if the
  424. * supplied object is not of type byte[].
  425. *
  426. * @param pObject Object to encode
  427. * @return An object (of type byte[]) containing the
  428. * base64 encoded data which corresponds to the byte[] supplied.
  429. * @throws EncoderException if the parameter supplied is not
  430. * of type byte[]
  431. */
  432. public Object encode(Object pObject) throws EncoderException {
  433. if (!(pObject instanceof byte[])) {
  434. throw new EncoderException(
  435. "Parameter supplied to Base64 encode is not a byte[]");
  436. }
  437. return encode((byte[]) pObject);
  438. }
  439. /**
  440. * Encodes a byte[] containing binary data, into a byte[] containing
  441. * characters in the Base64 alphabet.
  442. *
  443. * @param pArray a byte array containing binary data
  444. * @return A byte array containing only Base64 character data
  445. */
  446. public byte[] encode(byte[] pArray) {
  447. return encodeBase64(pArray, false);
  448. }
  449. }