1. /*
  2. * Copyright 2001-2004 The Apache Software Foundation
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. *
  16. */
  17. /*
  18. * This package is based on the work done by Keiron Liddle, Aftex Software
  19. * <keiron@aftexsw.com> to whom the Ant project is very grateful for his
  20. * great code.
  21. */
  22. package org.apache.tools.bzip2;
  23. import java.io.InputStream;
  24. import java.io.IOException;
  25. /**
  26. * An input stream that decompresses from the BZip2 format (without the file
  27. * header chars) to be read as any other stream.
  28. *
  29. */
  30. public class CBZip2InputStream extends InputStream implements BZip2Constants {
  31. private static void cadvise() {
  32. System.out.println("CRC Error");
  33. //throw new CCoruptionError();
  34. }
  35. private static void badBGLengths() {
  36. cadvise();
  37. }
  38. private static void bitStreamEOF() {
  39. cadvise();
  40. }
  41. private static void compressedStreamEOF() {
  42. cadvise();
  43. }
  44. private void makeMaps() {
  45. int i;
  46. nInUse = 0;
  47. for (i = 0; i < 256; i++) {
  48. if (inUse[i]) {
  49. seqToUnseq[nInUse] = (char) i;
  50. unseqToSeq[i] = (char) nInUse;
  51. nInUse++;
  52. }
  53. }
  54. }
  55. /*
  56. index of the last char in the block, so
  57. the block size == last + 1.
  58. */
  59. private int last;
  60. /*
  61. index in zptr[] of original string after sorting.
  62. */
  63. private int origPtr;
  64. /*
  65. always: in the range 0 .. 9.
  66. The current block size is 100000 * this number.
  67. */
  68. private int blockSize100k;
  69. private boolean blockRandomised;
  70. private int bsBuff;
  71. private int bsLive;
  72. private CRC mCrc = new CRC();
  73. private boolean[] inUse = new boolean[256];
  74. private int nInUse;
  75. private char[] seqToUnseq = new char[256];
  76. private char[] unseqToSeq = new char[256];
  77. private char[] selector = new char[MAX_SELECTORS];
  78. private char[] selectorMtf = new char[MAX_SELECTORS];
  79. private int[] tt;
  80. private char[] ll8;
  81. /*
  82. freq table collected to save a pass over the data
  83. during decompression.
  84. */
  85. private int[] unzftab = new int[256];
  86. private int[][] limit = new int[N_GROUPS][MAX_ALPHA_SIZE];
  87. private int[][] base = new int[N_GROUPS][MAX_ALPHA_SIZE];
  88. private int[][] perm = new int[N_GROUPS][MAX_ALPHA_SIZE];
  89. private int[] minLens = new int[N_GROUPS];
  90. private InputStream bsStream;
  91. private boolean streamEnd = false;
  92. private int currentChar = -1;
  93. private static final int START_BLOCK_STATE = 1;
  94. private static final int RAND_PART_A_STATE = 2;
  95. private static final int RAND_PART_B_STATE = 3;
  96. private static final int RAND_PART_C_STATE = 4;
  97. private static final int NO_RAND_PART_A_STATE = 5;
  98. private static final int NO_RAND_PART_B_STATE = 6;
  99. private static final int NO_RAND_PART_C_STATE = 7;
  100. private int currentState = START_BLOCK_STATE;
  101. private int storedBlockCRC, storedCombinedCRC;
  102. private int computedBlockCRC, computedCombinedCRC;
  103. int i2, count, chPrev, ch2;
  104. int i, tPos;
  105. int rNToGo = 0;
  106. int rTPos = 0;
  107. int j2;
  108. char z;
  109. public CBZip2InputStream(InputStream zStream) {
  110. ll8 = null;
  111. tt = null;
  112. bsSetStream(zStream);
  113. initialize();
  114. initBlock();
  115. setupBlock();
  116. }
  117. public int read() {
  118. if (streamEnd) {
  119. return -1;
  120. } else {
  121. int retChar = currentChar;
  122. switch(currentState) {
  123. case START_BLOCK_STATE:
  124. break;
  125. case RAND_PART_A_STATE:
  126. break;
  127. case RAND_PART_B_STATE:
  128. setupRandPartB();
  129. break;
  130. case RAND_PART_C_STATE:
  131. setupRandPartC();
  132. break;
  133. case NO_RAND_PART_A_STATE:
  134. break;
  135. case NO_RAND_PART_B_STATE:
  136. setupNoRandPartB();
  137. break;
  138. case NO_RAND_PART_C_STATE:
  139. setupNoRandPartC();
  140. break;
  141. default:
  142. break;
  143. }
  144. return retChar;
  145. }
  146. }
  147. private void initialize() {
  148. char magic3, magic4;
  149. magic3 = bsGetUChar();
  150. magic4 = bsGetUChar();
  151. if (magic3 != 'h' || magic4 < '1' || magic4 > '9') {
  152. bsFinishedWithStream();
  153. streamEnd = true;
  154. return;
  155. }
  156. setDecompressStructureSizes(magic4 - '0');
  157. computedCombinedCRC = 0;
  158. }
  159. private void initBlock() {
  160. char magic1, magic2, magic3, magic4;
  161. char magic5, magic6;
  162. magic1 = bsGetUChar();
  163. magic2 = bsGetUChar();
  164. magic3 = bsGetUChar();
  165. magic4 = bsGetUChar();
  166. magic5 = bsGetUChar();
  167. magic6 = bsGetUChar();
  168. if (magic1 == 0x17 && magic2 == 0x72 && magic3 == 0x45
  169. && magic4 == 0x38 && magic5 == 0x50 && magic6 == 0x90) {
  170. complete();
  171. return;
  172. }
  173. if (magic1 != 0x31 || magic2 != 0x41 || magic3 != 0x59
  174. || magic4 != 0x26 || magic5 != 0x53 || magic6 != 0x59) {
  175. badBlockHeader();
  176. streamEnd = true;
  177. return;
  178. }
  179. storedBlockCRC = bsGetInt32();
  180. if (bsR(1) == 1) {
  181. blockRandomised = true;
  182. } else {
  183. blockRandomised = false;
  184. }
  185. // currBlockNo++;
  186. getAndMoveToFrontDecode();
  187. mCrc.initialiseCRC();
  188. currentState = START_BLOCK_STATE;
  189. }
  190. private void endBlock() {
  191. computedBlockCRC = mCrc.getFinalCRC();
  192. /* A bad CRC is considered a fatal error. */
  193. if (storedBlockCRC != computedBlockCRC) {
  194. crcError();
  195. }
  196. computedCombinedCRC = (computedCombinedCRC << 1)
  197. | (computedCombinedCRC >>> 31);
  198. computedCombinedCRC ^= computedBlockCRC;
  199. }
  200. private void complete() {
  201. storedCombinedCRC = bsGetInt32();
  202. if (storedCombinedCRC != computedCombinedCRC) {
  203. crcError();
  204. }
  205. bsFinishedWithStream();
  206. streamEnd = true;
  207. }
  208. private static void blockOverrun() {
  209. cadvise();
  210. }
  211. private static void badBlockHeader() {
  212. cadvise();
  213. }
  214. private static void crcError() {
  215. cadvise();
  216. }
  217. private void bsFinishedWithStream() {
  218. try {
  219. if (this.bsStream != null) {
  220. if (this.bsStream != System.in) {
  221. this.bsStream.close();
  222. this.bsStream = null;
  223. }
  224. }
  225. } catch (IOException ioe) {
  226. //ignore
  227. }
  228. }
  229. private void bsSetStream(InputStream f) {
  230. bsStream = f;
  231. bsLive = 0;
  232. bsBuff = 0;
  233. }
  234. private int bsR(int n) {
  235. int v;
  236. while (bsLive < n) {
  237. int zzi;
  238. char thech = 0;
  239. try {
  240. thech = (char) bsStream.read();
  241. } catch (IOException e) {
  242. compressedStreamEOF();
  243. }
  244. if (thech == -1) {
  245. compressedStreamEOF();
  246. }
  247. zzi = thech;
  248. bsBuff = (bsBuff << 8) | (zzi & 0xff);
  249. bsLive += 8;
  250. }
  251. v = (bsBuff >> (bsLive - n)) & ((1 << n) - 1);
  252. bsLive -= n;
  253. return v;
  254. }
  255. private char bsGetUChar() {
  256. return (char) bsR(8);
  257. }
  258. private int bsGetint() {
  259. int u = 0;
  260. u = (u << 8) | bsR(8);
  261. u = (u << 8) | bsR(8);
  262. u = (u << 8) | bsR(8);
  263. u = (u << 8) | bsR(8);
  264. return u;
  265. }
  266. private int bsGetIntVS(int numBits) {
  267. return (int) bsR(numBits);
  268. }
  269. private int bsGetInt32() {
  270. return (int) bsGetint();
  271. }
  272. private void hbCreateDecodeTables(int[] limit, int[] base,
  273. int[] perm, char[] length,
  274. int minLen, int maxLen, int alphaSize) {
  275. int pp, i, j, vec;
  276. pp = 0;
  277. for (i = minLen; i <= maxLen; i++) {
  278. for (j = 0; j < alphaSize; j++) {
  279. if (length[j] == i) {
  280. perm[pp] = j;
  281. pp++;
  282. }
  283. }
  284. }
  285. for (i = 0; i < MAX_CODE_LEN; i++) {
  286. base[i] = 0;
  287. }
  288. for (i = 0; i < alphaSize; i++) {
  289. base[length[i] + 1]++;
  290. }
  291. for (i = 1; i < MAX_CODE_LEN; i++) {
  292. base[i] += base[i - 1];
  293. }
  294. for (i = 0; i < MAX_CODE_LEN; i++) {
  295. limit[i] = 0;
  296. }
  297. vec = 0;
  298. for (i = minLen; i <= maxLen; i++) {
  299. vec += (base[i + 1] - base[i]);
  300. limit[i] = vec - 1;
  301. vec <<= 1;
  302. }
  303. for (i = minLen + 1; i <= maxLen; i++) {
  304. base[i] = ((limit[i - 1] + 1) << 1) - base[i];
  305. }
  306. }
  307. private void recvDecodingTables() {
  308. char len[][] = new char[N_GROUPS][MAX_ALPHA_SIZE];
  309. int i, j, t, nGroups, nSelectors, alphaSize;
  310. int minLen, maxLen;
  311. boolean[] inUse16 = new boolean[16];
  312. /* Receive the mapping table */
  313. for (i = 0; i < 16; i++) {
  314. if (bsR(1) == 1) {
  315. inUse16[i] = true;
  316. } else {
  317. inUse16[i] = false;
  318. }
  319. }
  320. for (i = 0; i < 256; i++) {
  321. inUse[i] = false;
  322. }
  323. for (i = 0; i < 16; i++) {
  324. if (inUse16[i]) {
  325. for (j = 0; j < 16; j++) {
  326. if (bsR(1) == 1) {
  327. inUse[i * 16 + j] = true;
  328. }
  329. }
  330. }
  331. }
  332. makeMaps();
  333. alphaSize = nInUse + 2;
  334. /* Now the selectors */
  335. nGroups = bsR(3);
  336. nSelectors = bsR(15);
  337. for (i = 0; i < nSelectors; i++) {
  338. j = 0;
  339. while (bsR(1) == 1) {
  340. j++;
  341. }
  342. selectorMtf[i] = (char) j;
  343. }
  344. /* Undo the MTF values for the selectors. */
  345. {
  346. char[] pos = new char[N_GROUPS];
  347. char tmp, v;
  348. for (v = 0; v < nGroups; v++) {
  349. pos[v] = v;
  350. }
  351. for (i = 0; i < nSelectors; i++) {
  352. v = selectorMtf[i];
  353. tmp = pos[v];
  354. while (v > 0) {
  355. pos[v] = pos[v - 1];
  356. v--;
  357. }
  358. pos[0] = tmp;
  359. selector[i] = tmp;
  360. }
  361. }
  362. /* Now the coding tables */
  363. for (t = 0; t < nGroups; t++) {
  364. int curr = bsR(5);
  365. for (i = 0; i < alphaSize; i++) {
  366. while (bsR(1) == 1) {
  367. if (bsR(1) == 0) {
  368. curr++;
  369. } else {
  370. curr--;
  371. }
  372. }
  373. len[t][i] = (char) curr;
  374. }
  375. }
  376. /* Create the Huffman decoding tables */
  377. for (t = 0; t < nGroups; t++) {
  378. minLen = 32;
  379. maxLen = 0;
  380. for (i = 0; i < alphaSize; i++) {
  381. if (len[t][i] > maxLen) {
  382. maxLen = len[t][i];
  383. }
  384. if (len[t][i] < minLen) {
  385. minLen = len[t][i];
  386. }
  387. }
  388. hbCreateDecodeTables(limit[t], base[t], perm[t], len[t], minLen,
  389. maxLen, alphaSize);
  390. minLens[t] = minLen;
  391. }
  392. }
  393. private void getAndMoveToFrontDecode() {
  394. char[] yy = new char[256];
  395. int i, j, nextSym, limitLast;
  396. int EOB, groupNo, groupPos;
  397. limitLast = baseBlockSize * blockSize100k;
  398. origPtr = bsGetIntVS(24);
  399. recvDecodingTables();
  400. EOB = nInUse + 1;
  401. groupNo = -1;
  402. groupPos = 0;
  403. /*
  404. Setting up the unzftab entries here is not strictly
  405. necessary, but it does save having to do it later
  406. in a separate pass, and so saves a block's worth of
  407. cache misses.
  408. */
  409. for (i = 0; i <= 255; i++) {
  410. unzftab[i] = 0;
  411. }
  412. for (i = 0; i <= 255; i++) {
  413. yy[i] = (char) i;
  414. }
  415. last = -1;
  416. {
  417. int zt, zn, zvec, zj;
  418. if (groupPos == 0) {
  419. groupNo++;
  420. groupPos = G_SIZE;
  421. }
  422. groupPos--;
  423. zt = selector[groupNo];
  424. zn = minLens[zt];
  425. zvec = bsR(zn);
  426. while (zvec > limit[zt][zn]) {
  427. zn++;
  428. {
  429. {
  430. while (bsLive < 1) {
  431. int zzi;
  432. char thech = 0;
  433. try {
  434. thech = (char) bsStream.read();
  435. } catch (IOException e) {
  436. compressedStreamEOF();
  437. }
  438. if (thech == -1) {
  439. compressedStreamEOF();
  440. }
  441. zzi = thech;
  442. bsBuff = (bsBuff << 8) | (zzi & 0xff);
  443. bsLive += 8;
  444. }
  445. }
  446. zj = (bsBuff >> (bsLive - 1)) & 1;
  447. bsLive--;
  448. }
  449. zvec = (zvec << 1) | zj;
  450. }
  451. nextSym = perm[zt][zvec - base[zt][zn]];
  452. }
  453. while (true) {
  454. if (nextSym == EOB) {
  455. break;
  456. }
  457. if (nextSym == RUNA || nextSym == RUNB) {
  458. char ch;
  459. int s = -1;
  460. int N = 1;
  461. do {
  462. if (nextSym == RUNA) {
  463. s = s + (0 + 1) * N;
  464. } else if (nextSym == RUNB) {
  465. s = s + (1 + 1) * N;
  466. }
  467. N = N * 2;
  468. {
  469. int zt, zn, zvec, zj;
  470. if (groupPos == 0) {
  471. groupNo++;
  472. groupPos = G_SIZE;
  473. }
  474. groupPos--;
  475. zt = selector[groupNo];
  476. zn = minLens[zt];
  477. zvec = bsR(zn);
  478. while (zvec > limit[zt][zn]) {
  479. zn++;
  480. {
  481. {
  482. while (bsLive < 1) {
  483. int zzi;
  484. char thech = 0;
  485. try {
  486. thech = (char) bsStream.read();
  487. } catch (IOException e) {
  488. compressedStreamEOF();
  489. }
  490. if (thech == -1) {
  491. compressedStreamEOF();
  492. }
  493. zzi = thech;
  494. bsBuff = (bsBuff << 8) | (zzi & 0xff);
  495. bsLive += 8;
  496. }
  497. }
  498. zj = (bsBuff >> (bsLive - 1)) & 1;
  499. bsLive--;
  500. }
  501. zvec = (zvec << 1) | zj;
  502. }
  503. nextSym = perm[zt][zvec - base[zt][zn]];
  504. }
  505. } while (nextSym == RUNA || nextSym == RUNB);
  506. s++;
  507. ch = seqToUnseq[yy[0]];
  508. unzftab[ch] += s;
  509. while (s > 0) {
  510. last++;
  511. ll8[last] = ch;
  512. s--;
  513. }
  514. if (last >= limitLast) {
  515. blockOverrun();
  516. }
  517. continue;
  518. } else {
  519. char tmp;
  520. last++;
  521. if (last >= limitLast) {
  522. blockOverrun();
  523. }
  524. tmp = yy[nextSym - 1];
  525. unzftab[seqToUnseq[tmp]]++;
  526. ll8[last] = seqToUnseq[tmp];
  527. /*
  528. This loop is hammered during decompression,
  529. hence the unrolling.
  530. for (j = nextSym-1; j > 0; j--) yy[j] = yy[j-1];
  531. */
  532. j = nextSym - 1;
  533. for (; j > 3; j -= 4) {
  534. yy[j] = yy[j - 1];
  535. yy[j - 1] = yy[j - 2];
  536. yy[j - 2] = yy[j - 3];
  537. yy[j - 3] = yy[j - 4];
  538. }
  539. for (; j > 0; j--) {
  540. yy[j] = yy[j - 1];
  541. }
  542. yy[0] = tmp;
  543. {
  544. int zt, zn, zvec, zj;
  545. if (groupPos == 0) {
  546. groupNo++;
  547. groupPos = G_SIZE;
  548. }
  549. groupPos--;
  550. zt = selector[groupNo];
  551. zn = minLens[zt];
  552. zvec = bsR(zn);
  553. while (zvec > limit[zt][zn]) {
  554. zn++;
  555. {
  556. {
  557. while (bsLive < 1) {
  558. int zzi;
  559. char thech = 0;
  560. try {
  561. thech = (char) bsStream.read();
  562. } catch (IOException e) {
  563. compressedStreamEOF();
  564. }
  565. zzi = thech;
  566. bsBuff = (bsBuff << 8) | (zzi & 0xff);
  567. bsLive += 8;
  568. }
  569. }
  570. zj = (bsBuff >> (bsLive - 1)) & 1;
  571. bsLive--;
  572. }
  573. zvec = (zvec << 1) | zj;
  574. }
  575. nextSym = perm[zt][zvec - base[zt][zn]];
  576. }
  577. continue;
  578. }
  579. }
  580. }
  581. private void setupBlock() {
  582. int[] cftab = new int[257];
  583. char ch;
  584. cftab[0] = 0;
  585. for (i = 1; i <= 256; i++) {
  586. cftab[i] = unzftab[i - 1];
  587. }
  588. for (i = 1; i <= 256; i++) {
  589. cftab[i] += cftab[i - 1];
  590. }
  591. for (i = 0; i <= last; i++) {
  592. ch = (char) ll8[i];
  593. tt[cftab[ch]] = i;
  594. cftab[ch]++;
  595. }
  596. cftab = null;
  597. tPos = tt[origPtr];
  598. count = 0;
  599. i2 = 0;
  600. ch2 = 256; /* not a char and not EOF */
  601. if (blockRandomised) {
  602. rNToGo = 0;
  603. rTPos = 0;
  604. setupRandPartA();
  605. } else {
  606. setupNoRandPartA();
  607. }
  608. }
  609. private void setupRandPartA() {
  610. if (i2 <= last) {
  611. chPrev = ch2;
  612. ch2 = ll8[tPos];
  613. tPos = tt[tPos];
  614. if (rNToGo == 0) {
  615. rNToGo = rNums[rTPos];
  616. rTPos++;
  617. if (rTPos == 512) {
  618. rTPos = 0;
  619. }
  620. }
  621. rNToGo--;
  622. ch2 ^= (int) ((rNToGo == 1) ? 1 : 0);
  623. i2++;
  624. currentChar = ch2;
  625. currentState = RAND_PART_B_STATE;
  626. mCrc.updateCRC(ch2);
  627. } else {
  628. endBlock();
  629. initBlock();
  630. setupBlock();
  631. }
  632. }
  633. private void setupNoRandPartA() {
  634. if (i2 <= last) {
  635. chPrev = ch2;
  636. ch2 = ll8[tPos];
  637. tPos = tt[tPos];
  638. i2++;
  639. currentChar = ch2;
  640. currentState = NO_RAND_PART_B_STATE;
  641. mCrc.updateCRC(ch2);
  642. } else {
  643. endBlock();
  644. initBlock();
  645. setupBlock();
  646. }
  647. }
  648. private void setupRandPartB() {
  649. if (ch2 != chPrev) {
  650. currentState = RAND_PART_A_STATE;
  651. count = 1;
  652. setupRandPartA();
  653. } else {
  654. count++;
  655. if (count >= 4) {
  656. z = ll8[tPos];
  657. tPos = tt[tPos];
  658. if (rNToGo == 0) {
  659. rNToGo = rNums[rTPos];
  660. rTPos++;
  661. if (rTPos == 512) {
  662. rTPos = 0;
  663. }
  664. }
  665. rNToGo--;
  666. z ^= ((rNToGo == 1) ? 1 : 0);
  667. j2 = 0;
  668. currentState = RAND_PART_C_STATE;
  669. setupRandPartC();
  670. } else {
  671. currentState = RAND_PART_A_STATE;
  672. setupRandPartA();
  673. }
  674. }
  675. }
  676. private void setupRandPartC() {
  677. if (j2 < (int) z) {
  678. currentChar = ch2;
  679. mCrc.updateCRC(ch2);
  680. j2++;
  681. } else {
  682. currentState = RAND_PART_A_STATE;
  683. i2++;
  684. count = 0;
  685. setupRandPartA();
  686. }
  687. }
  688. private void setupNoRandPartB() {
  689. if (ch2 != chPrev) {
  690. currentState = NO_RAND_PART_A_STATE;
  691. count = 1;
  692. setupNoRandPartA();
  693. } else {
  694. count++;
  695. if (count >= 4) {
  696. z = ll8[tPos];
  697. tPos = tt[tPos];
  698. currentState = NO_RAND_PART_C_STATE;
  699. j2 = 0;
  700. setupNoRandPartC();
  701. } else {
  702. currentState = NO_RAND_PART_A_STATE;
  703. setupNoRandPartA();
  704. }
  705. }
  706. }
  707. private void setupNoRandPartC() {
  708. if (j2 < (int) z) {
  709. currentChar = ch2;
  710. mCrc.updateCRC(ch2);
  711. j2++;
  712. } else {
  713. currentState = NO_RAND_PART_A_STATE;
  714. i2++;
  715. count = 0;
  716. setupNoRandPartA();
  717. }
  718. }
  719. private void setDecompressStructureSizes(int newSize100k) {
  720. if (!(0 <= newSize100k && newSize100k <= 9 && 0 <= blockSize100k
  721. && blockSize100k <= 9)) {
  722. // throw new IOException("Invalid block size");
  723. }
  724. blockSize100k = newSize100k;
  725. if (newSize100k == 0) {
  726. return;
  727. }
  728. int n = baseBlockSize * newSize100k;
  729. ll8 = new char[n];
  730. tt = new int[n];
  731. }
  732. }