1. /*
  2. * @(#)RTFParser.java 1.7 00/02/02
  3. *
  4. * Copyright 1997-2000 Sun Microsystems, Inc. All Rights Reserved.
  5. *
  6. * This software is the proprietary information of Sun Microsystems, Inc.
  7. * Use is subject to license terms.
  8. *
  9. */
  10. package javax.swing.text.rtf;
  11. import java.io.*;
  12. import java.lang.*;
  13. /**
  14. * <b>RTFParser</b> is a subclass of <b>AbstractFilter</b> which understands basic RTF syntax
  15. * and passes a stream of control words, text, and begin/end group
  16. * indications to its subclass.
  17. *
  18. * Normally programmers will only use <b>RTFFilter</b>, a subclass of this class that knows what to
  19. * do with the tokens this class parses.
  20. *
  21. * @see AbstractFilter
  22. * @see RTFFilter
  23. */
  24. abstract class RTFParser extends AbstractFilter
  25. {
  26. /** The current RTF group nesting level. */
  27. public int level;
  28. private int state;
  29. private StringBuffer currentCharacters;
  30. private String pendingKeyword; // where keywords go while we
  31. // read their parameters
  32. private int pendingCharacter; // for the \'xx construct
  33. private long binaryBytesLeft; // in a \bin blob?
  34. ByteArrayOutputStream binaryBuf;
  35. private boolean[] savedSpecials;
  36. /** A stream to which to write warnings and debugging information
  37. * while parsing. This is set to <code>System.out</code> to log
  38. * any anomalous information to stdout. */
  39. protected PrintStream warnings;
  40. // value for the 'state' variable
  41. private final int S_text = 0; // reading random text
  42. private final int S_backslashed = 1; // read a backslash, wating for next
  43. private final int S_token = 2; // reading a multicharacter token
  44. private final int S_parameter = 3; // reading a token's parameter
  45. private final int S_aftertick = 4; // after reading \'
  46. private final int S_aftertickc = 5; // after reading \'x
  47. private final int S_inblob = 6; // in a \bin blob
  48. /** Implemented by subclasses to interpret a paramater-less RTF keyword.
  49. * The keyword is passed without the leading '/' or any delimiting
  50. * whitespace. */
  51. public abstract boolean handleKeyword(String keyword);
  52. /** Implemented by subclasses to interpret a keyword with a parameter.
  53. * @param keyword The keyword, as with <code>handleKeyword(String)</code>.
  54. * @param parameter The parameter following the keyword. */
  55. public abstract boolean handleKeyword(String keyword, int parameter);
  56. /** Implemented by subclasses to interpret text from the RTF stream. */
  57. public abstract void handleText(String text);
  58. public void handleText(char ch)
  59. { handleText(String.valueOf(ch)); }
  60. /** Implemented by subclasses to handle the contents of the \bin keyword. */
  61. public abstract void handleBinaryBlob(byte[] data);
  62. /** Implemented by subclasses to react to an increase
  63. * in the nesting level. */
  64. public abstract void begingroup();
  65. /** Implemented by subclasses to react to the end of a group. */
  66. public abstract void endgroup();
  67. // table of non-text characters in rtf
  68. static final boolean rtfSpecialsTable[];
  69. static {
  70. rtfSpecialsTable = (boolean[])noSpecialsTable.clone();
  71. rtfSpecialsTable['\n'] = true;
  72. rtfSpecialsTable['\r'] = true;
  73. rtfSpecialsTable['{'] = true;
  74. rtfSpecialsTable['}'] = true;
  75. rtfSpecialsTable['\\'] = true;
  76. }
  77. public RTFParser()
  78. {
  79. currentCharacters = new StringBuffer();
  80. state = S_text;
  81. pendingKeyword = null;
  82. level = 0;
  83. //warnings = System.out;
  84. specialsTable = rtfSpecialsTable;
  85. }
  86. // TODO: Handle wrapup at end of file correctly.
  87. public void writeSpecial(int b)
  88. throws IOException
  89. {
  90. write((char)b);
  91. }
  92. protected void warning(String s) {
  93. if (warnings != null) {
  94. warnings.println(s);
  95. }
  96. }
  97. public void write(String s)
  98. throws IOException
  99. {
  100. if (state != S_text) {
  101. int index = 0;
  102. int length = s.length();
  103. while(index < length && state != S_text) {
  104. write(s.charAt(index));
  105. index ++;
  106. }
  107. if(index >= length)
  108. return;
  109. s = s.substring(index);
  110. }
  111. if (currentCharacters.length() > 0)
  112. currentCharacters.append(s);
  113. else
  114. handleText(s);
  115. }
  116. public void write(char ch)
  117. throws IOException
  118. {
  119. boolean ok;
  120. switch (state)
  121. {
  122. case S_text:
  123. if (ch == '\n' || ch == '\r') {
  124. break; // unadorned newlines are ignored
  125. } else if (ch == '{') {
  126. if (currentCharacters.length() > 0) {
  127. handleText(currentCharacters.toString());
  128. currentCharacters = new StringBuffer();
  129. }
  130. level ++;
  131. begingroup();
  132. } else if(ch == '}') {
  133. if (currentCharacters.length() > 0) {
  134. handleText(currentCharacters.toString());
  135. currentCharacters = new StringBuffer();
  136. }
  137. if (level == 0)
  138. throw new IOException("Too many close-groups in RTF text");
  139. endgroup();
  140. level --;
  141. } else if(ch == '\\') {
  142. if (currentCharacters.length() > 0) {
  143. handleText(currentCharacters.toString());
  144. currentCharacters = new StringBuffer();
  145. }
  146. state = S_backslashed;
  147. } else {
  148. currentCharacters.append(ch);
  149. }
  150. break;
  151. case S_backslashed:
  152. if (ch == '\'') {
  153. state = S_aftertick;
  154. break;
  155. }
  156. if (!Character.isLetter(ch)) {
  157. char newstring[] = new char[1];
  158. newstring[0] = ch;
  159. if (!handleKeyword(new String(newstring))) {
  160. warning("Unknown keyword: " + newstring + " (" + (byte)ch + ")");
  161. }
  162. state = S_text;
  163. pendingKeyword = null;
  164. /* currentCharacters is already an empty stringBuffer */
  165. break;
  166. }
  167. state = S_token;
  168. /* FALL THROUGH */
  169. case S_token:
  170. if (Character.isLetter(ch)) {
  171. currentCharacters.append(ch);
  172. } else {
  173. pendingKeyword = currentCharacters.toString();
  174. currentCharacters = new StringBuffer();
  175. // Parameter following?
  176. if (Character.isDigit(ch) || (ch == '-')) {
  177. state = S_parameter;
  178. currentCharacters.append(ch);
  179. } else {
  180. ok = handleKeyword(pendingKeyword);
  181. if (!ok)
  182. warning("Unknown keyword: " + pendingKeyword);
  183. pendingKeyword = null;
  184. state = S_text;
  185. // Non-space delimiters get included in the text
  186. if (!Character.isWhitespace(ch))
  187. write(ch);
  188. }
  189. }
  190. break;
  191. case S_parameter:
  192. if (Character.isDigit(ch)) {
  193. currentCharacters.append(ch);
  194. } else {
  195. /* TODO: Test correct behavior of \bin keyword */
  196. if (pendingKeyword.equals("bin")) { /* magic layer-breaking kwd */
  197. long parameter = Long.parseLong(currentCharacters.toString());
  198. pendingKeyword = null;
  199. state = S_inblob;
  200. binaryBytesLeft = parameter;
  201. if (binaryBytesLeft > Integer.MAX_VALUE)
  202. binaryBuf = new ByteArrayOutputStream(Integer.MAX_VALUE);
  203. else
  204. binaryBuf = new ByteArrayOutputStream((int)binaryBytesLeft);
  205. savedSpecials = specialsTable;
  206. specialsTable = allSpecialsTable;
  207. break;
  208. }
  209. int parameter = Integer.parseInt(currentCharacters.toString());
  210. ok = handleKeyword(pendingKeyword, parameter);
  211. if (!ok)
  212. warning("Unknown keyword: " + pendingKeyword +
  213. " (param " + currentCharacters + ")");
  214. pendingKeyword = null;
  215. currentCharacters = new StringBuffer();
  216. state = S_text;
  217. // Delimiters here are interpreted as text too
  218. if (!Character.isWhitespace(ch))
  219. write(ch);
  220. }
  221. break;
  222. case S_aftertick:
  223. if (Character.digit(ch, 16) == -1)
  224. state = S_text;
  225. else {
  226. pendingCharacter = Character.digit(ch, 16);
  227. state = S_aftertickc;
  228. }
  229. break;
  230. case S_aftertickc:
  231. state = S_text;
  232. if (Character.digit(ch, 16) != -1)
  233. {
  234. pendingCharacter = pendingCharacter * 16 + Character.digit(ch, 16);
  235. ch = translationTable[pendingCharacter];
  236. if (ch != 0)
  237. handleText(ch);
  238. }
  239. break;
  240. case S_inblob:
  241. binaryBuf.write(ch);
  242. binaryBytesLeft --;
  243. if (binaryBytesLeft == 0) {
  244. state = S_text;
  245. specialsTable = savedSpecials;
  246. savedSpecials = null;
  247. handleBinaryBlob(binaryBuf.toByteArray());
  248. binaryBuf = null;
  249. }
  250. }
  251. }
  252. /** Flushes any buffered but not yet written characters.
  253. * Subclasses which override this method should call this
  254. * method <em>before</em> flushing
  255. * any of their own buffers. */
  256. public void flush()
  257. throws IOException
  258. {
  259. super.flush();
  260. if (state == S_text && currentCharacters.length() > 0) {
  261. handleText(currentCharacters.toString());
  262. currentCharacters = new StringBuffer();
  263. }
  264. }
  265. /** Closes the parser. Currently, this simply does a <code>flush()</code>,
  266. * followed by some minimal consistency checks. */
  267. public void close()
  268. throws IOException
  269. {
  270. flush();
  271. if (state != S_text || level > 0) {
  272. warning("Truncated RTF file.");
  273. /* TODO: any sane way to handle termination in a non-S_text state? */
  274. /* probably not */
  275. /* this will cause subclasses to behave more reasonably
  276. some of the time */
  277. while (level > 0) {
  278. endgroup();
  279. level --;
  280. }
  281. }
  282. super.close();
  283. }
  284. }