1. /*
  2. * @(#)RTFParser.java 1.7 01/11/29
  3. *
  4. * Copyright 2002 Sun Microsystems, Inc. All rights reserved.
  5. * SUN PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
  6. */
  7. package javax.swing.text.rtf;
  8. import java.io.*;
  9. import java.lang.*;
  10. /**
  11. * <b>RTFParser</b> is a subclass of <b>AbstractFilter</b> which understands basic RTF syntax
  12. * and passes a stream of control words, text, and begin/end group
  13. * indications to its subclass.
  14. *
  15. * Normally programmers will only use <b>RTFFilter</b>, a subclass of this class that knows what to
  16. * do with the tokens this class parses.
  17. *
  18. * @see AbstractFilter
  19. * @see RTFFilter
  20. */
  21. abstract class RTFParser extends AbstractFilter
  22. {
  23. /** The current RTF group nesting level. */
  24. public int level;
  25. private int state;
  26. private StringBuffer currentCharacters;
  27. private String pendingKeyword; // where keywords go while we
  28. // read their parameters
  29. private int pendingCharacter; // for the \'xx construct
  30. private long binaryBytesLeft; // in a \bin blob?
  31. ByteArrayOutputStream binaryBuf;
  32. private boolean[] savedSpecials;
  33. /** A stream to which to write warnings and debugging information
  34. * while parsing. This is set to <code>System.out</code> to log
  35. * any anomalous information to stdout. */
  36. protected PrintStream warnings;
  37. // value for the 'state' variable
  38. private final int S_text = 0; // reading random text
  39. private final int S_backslashed = 1; // read a backslash, wating for next
  40. private final int S_token = 2; // reading a multicharacter token
  41. private final int S_parameter = 3; // reading a token's parameter
  42. private final int S_aftertick = 4; // after reading \'
  43. private final int S_aftertickc = 5; // after reading \'x
  44. private final int S_inblob = 6; // in a \bin blob
  45. /** Implemented by subclasses to interpret a paramater-less RTF keyword.
  46. * The keyword is passed without the leading '/' or any delimiting
  47. * whitespace. */
  48. public abstract boolean handleKeyword(String keyword);
  49. /** Implemented by subclasses to interpret a keyword with a parameter.
  50. * @param keyword The keyword, as with <code>handleKeyword(String)</code>.
  51. * @param parameter The parameter following the keyword. */
  52. public abstract boolean handleKeyword(String keyword, int parameter);
  53. /** Implemented by subclasses to interpret text from the RTF stream. */
  54. public abstract void handleText(String text);
  55. public void handleText(char ch)
  56. { handleText(String.valueOf(ch)); }
  57. /** Implemented by subclasses to handle the contents of the \bin keyword. */
  58. public abstract void handleBinaryBlob(byte[] data);
  59. /** Implemented by subclasses to react to an increase
  60. * in the nesting level. */
  61. public abstract void begingroup();
  62. /** Implemented by subclasses to react to the end of a group. */
  63. public abstract void endgroup();
  64. // table of non-text characters in rtf
  65. static final boolean rtfSpecialsTable[];
  66. static {
  67. rtfSpecialsTable = (boolean[])noSpecialsTable.clone();
  68. rtfSpecialsTable['\n'] = true;
  69. rtfSpecialsTable['\r'] = true;
  70. rtfSpecialsTable['{'] = true;
  71. rtfSpecialsTable['}'] = true;
  72. rtfSpecialsTable['\\'] = true;
  73. }
  74. public RTFParser()
  75. {
  76. currentCharacters = new StringBuffer();
  77. state = S_text;
  78. pendingKeyword = null;
  79. level = 0;
  80. //warnings = System.out;
  81. specialsTable = rtfSpecialsTable;
  82. }
  83. // TODO: Handle wrapup at end of file correctly.
  84. public void writeSpecial(int b)
  85. throws IOException
  86. {
  87. write((char)b);
  88. }
  89. protected void warning(String s) {
  90. if (warnings != null) {
  91. warnings.println(s);
  92. }
  93. }
  94. public void write(String s)
  95. throws IOException
  96. {
  97. if (state != S_text) {
  98. int index = 0;
  99. int length = s.length();
  100. while(index < length && state != S_text) {
  101. write(s.charAt(index));
  102. index ++;
  103. }
  104. if(index >= length)
  105. return;
  106. s = s.substring(index);
  107. }
  108. if (currentCharacters.length() > 0)
  109. currentCharacters.append(s);
  110. else
  111. handleText(s);
  112. }
  113. public void write(char ch)
  114. throws IOException
  115. {
  116. boolean ok;
  117. switch (state)
  118. {
  119. case S_text:
  120. if (ch == '\n' || ch == '\r') {
  121. break; // unadorned newlines are ignored
  122. } else if (ch == '{') {
  123. if (currentCharacters.length() > 0) {
  124. handleText(currentCharacters.toString());
  125. currentCharacters = new StringBuffer();
  126. }
  127. level ++;
  128. begingroup();
  129. } else if(ch == '}') {
  130. if (currentCharacters.length() > 0) {
  131. handleText(currentCharacters.toString());
  132. currentCharacters = new StringBuffer();
  133. }
  134. if (level == 0)
  135. throw new IOException("Too many close-groups in RTF text");
  136. endgroup();
  137. level --;
  138. } else if(ch == '\\') {
  139. if (currentCharacters.length() > 0) {
  140. handleText(currentCharacters.toString());
  141. currentCharacters = new StringBuffer();
  142. }
  143. state = S_backslashed;
  144. } else {
  145. currentCharacters.append(ch);
  146. }
  147. break;
  148. case S_backslashed:
  149. if (ch == '\'') {
  150. state = S_aftertick;
  151. break;
  152. }
  153. if (!Character.isLetter(ch)) {
  154. char newstring[] = new char[1];
  155. newstring[0] = ch;
  156. if (!handleKeyword(new String(newstring))) {
  157. warning("Unknown keyword: " + newstring + " (" + (byte)ch + ")");
  158. }
  159. state = S_text;
  160. pendingKeyword = null;
  161. /* currentCharacters is already an empty stringBuffer */
  162. break;
  163. }
  164. state = S_token;
  165. /* FALL THROUGH */
  166. case S_token:
  167. if (Character.isLetter(ch)) {
  168. currentCharacters.append(ch);
  169. } else {
  170. pendingKeyword = currentCharacters.toString();
  171. currentCharacters = new StringBuffer();
  172. // Parameter following?
  173. if (Character.isDigit(ch) || (ch == '-')) {
  174. state = S_parameter;
  175. currentCharacters.append(ch);
  176. } else {
  177. ok = handleKeyword(pendingKeyword);
  178. if (!ok)
  179. warning("Unknown keyword: " + pendingKeyword);
  180. pendingKeyword = null;
  181. state = S_text;
  182. // Non-space delimiters get included in the text
  183. if (!Character.isWhitespace(ch))
  184. write(ch);
  185. }
  186. }
  187. break;
  188. case S_parameter:
  189. if (Character.isDigit(ch)) {
  190. currentCharacters.append(ch);
  191. } else {
  192. /* TODO: Test correct behavior of \bin keyword */
  193. if (pendingKeyword.equals("bin")) { /* magic layer-breaking kwd */
  194. long parameter = Long.parseLong(currentCharacters.toString());
  195. pendingKeyword = null;
  196. state = S_inblob;
  197. binaryBytesLeft = parameter;
  198. if (binaryBytesLeft > Integer.MAX_VALUE)
  199. binaryBuf = new ByteArrayOutputStream(Integer.MAX_VALUE);
  200. else
  201. binaryBuf = new ByteArrayOutputStream((int)binaryBytesLeft);
  202. savedSpecials = specialsTable;
  203. specialsTable = allSpecialsTable;
  204. break;
  205. }
  206. int parameter = Integer.parseInt(currentCharacters.toString());
  207. ok = handleKeyword(pendingKeyword, parameter);
  208. if (!ok)
  209. warning("Unknown keyword: " + pendingKeyword +
  210. " (param " + currentCharacters + ")");
  211. pendingKeyword = null;
  212. currentCharacters = new StringBuffer();
  213. state = S_text;
  214. // Delimiters here are interpreted as text too
  215. if (!Character.isWhitespace(ch))
  216. write(ch);
  217. }
  218. break;
  219. case S_aftertick:
  220. if (Character.digit(ch, 16) == -1)
  221. state = S_text;
  222. else {
  223. pendingCharacter = Character.digit(ch, 16);
  224. state = S_aftertickc;
  225. }
  226. break;
  227. case S_aftertickc:
  228. state = S_text;
  229. if (Character.digit(ch, 16) != -1)
  230. {
  231. pendingCharacter = pendingCharacter * 16 + Character.digit(ch, 16);
  232. ch = translationTable[pendingCharacter];
  233. if (ch != 0)
  234. handleText(ch);
  235. }
  236. break;
  237. case S_inblob:
  238. binaryBuf.write(ch);
  239. binaryBytesLeft --;
  240. if (binaryBytesLeft == 0) {
  241. state = S_text;
  242. specialsTable = savedSpecials;
  243. savedSpecials = null;
  244. handleBinaryBlob(binaryBuf.toByteArray());
  245. binaryBuf = null;
  246. }
  247. }
  248. }
  249. /** Flushes any buffered but not yet written characters.
  250. * Subclasses which override this method should call this
  251. * method <em>before</em> flushing
  252. * any of their own buffers. */
  253. public void flush()
  254. throws IOException
  255. {
  256. super.flush();
  257. if (state == S_text && currentCharacters.length() > 0) {
  258. handleText(currentCharacters.toString());
  259. currentCharacters = new StringBuffer();
  260. }
  261. }
  262. /** Closes the parser. Currently, this simply does a <code>flush()</code>,
  263. * followed by some minimal consistency checks. */
  264. public void close()
  265. throws IOException
  266. {
  267. flush();
  268. if (state != S_text || level > 0) {
  269. warning("Truncated RTF file.");
  270. /* TODO: any sane way to handle termination in a non-S_text state? */
  271. /* probably not */
  272. /* this will cause subclasses to behave more reasonably
  273. some of the time */
  274. while (level > 0) {
  275. endgroup();
  276. level --;
  277. }
  278. }
  279. super.close();
  280. }
  281. }