1 /*
   2  * Copyright (c) 1997, 2008, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 package javax.swing.text.rtf;
  26 
  27 import java.io.*;
  28 import java.lang.*;
  29 
  30 /**
  31  * <b>RTFParser</b> is a subclass of <b>AbstractFilter</b> which understands basic RTF syntax
  32  * and passes a stream of control words, text, and begin/end group
  33  * indications to its subclass.
  34  *
  35  * Normally programmers will only use <b>RTFFilter</b>, a subclass of this class that knows what to
  36  * do with the tokens this class parses.
  37  *
  38  * @see AbstractFilter
  39  * @see RTFFilter
  40  */
  41 abstract class RTFParser extends AbstractFilter
  42 {
  43   /** The current RTF group nesting level. */
  44   public int level;
  45 
  46   private int state;
  47   private StringBuffer currentCharacters;
  48   private String pendingKeyword;                // where keywords go while we
  49                                                 // read their parameters
  50   private int pendingCharacter;                 // for the \'xx construct
  51 
  52   private long binaryBytesLeft;                  // in a \bin blob?
  53   ByteArrayOutputStream binaryBuf;
  54   private boolean[] savedSpecials;
  55 
  56   /** A stream to which to write warnings and debugging information
  57    *  while parsing. This is set to <code>System.out</code> to log
  58    *  any anomalous information to stdout. */
  59   protected PrintStream warnings;
  60 
  61   // value for the 'state' variable
  62   private final int S_text = 0;          // reading random text
  63   private final int S_backslashed = 1;   // read a backslash, waiting for next
  64   private final int S_token = 2;         // reading a multicharacter token
  65   private final int S_parameter = 3;     // reading a token's parameter
  66 
  67   private final int S_aftertick = 4;     // after reading \'
  68   private final int S_aftertickc = 5;    // after reading \'x
  69 
  70   private final int S_inblob = 6;        // in a \bin blob
  71 
  72   /** Implemented by subclasses to interpret a parameter-less RTF keyword.
  73    *  The keyword is passed without the leading '/' or any delimiting
  74    *  whitespace. */
  75   public abstract boolean handleKeyword(String keyword);
  76   /** Implemented by subclasses to interpret a keyword with a parameter.
  77    *  @param keyword   The keyword, as with <code>handleKeyword(String)</code>.
  78    *  @param parameter The parameter following the keyword. */
  79   public abstract boolean handleKeyword(String keyword, int parameter);
  80   /** Implemented by subclasses to interpret text from the RTF stream. */
  81   public abstract void handleText(String text);
  82   public void handleText(char ch)
  83   { handleText(String.valueOf(ch)); }
  84   /** Implemented by subclasses to handle the contents of the \bin keyword. */
  85   public abstract void handleBinaryBlob(byte[] data);
  86   /** Implemented by subclasses to react to an increase
  87    *  in the nesting level. */
  88   public abstract void begingroup();
  89   /** Implemented by subclasses to react to the end of a group. */
  90   public abstract void endgroup();
  91 
  92   // table of non-text characters in rtf
  93   static final boolean rtfSpecialsTable[];
  94   static {
  95     rtfSpecialsTable = noSpecialsTable.clone();
  96     rtfSpecialsTable['\n'] = true;
  97     rtfSpecialsTable['\r'] = true;
  98     rtfSpecialsTable['{'] = true;
  99     rtfSpecialsTable['}'] = true;
 100     rtfSpecialsTable['\\'] = true;
 101   }
 102 
 103   public RTFParser()
 104   {
 105     currentCharacters = new StringBuffer();
 106     state = S_text;
 107     pendingKeyword = null;
 108     level = 0;
 109     //warnings = System.out;
 110 
 111     specialsTable = rtfSpecialsTable;
 112   }
 113 
 114   // TODO: Handle wrapup at end of file correctly.
 115 
 116   public void writeSpecial(int b)
 117     throws IOException
 118   {
 119     write((char)b);
 120   }
 121 
 122     protected void warning(String s) {
 123         if (warnings != null) {
 124             warnings.println(s);
 125         }
 126     }
 127 
 128   public void write(String s)
 129     throws IOException
 130   {
 131     if (state != S_text) {
 132       int index = 0;
 133       int length = s.length();
 134       while(index < length && state != S_text) {
 135         write(s.charAt(index));
 136         index ++;
 137       }
 138 
 139       if(index >= length)
 140         return;
 141 
 142       s = s.substring(index);
 143     }
 144 
 145     if (currentCharacters.length() > 0)
 146       currentCharacters.append(s);
 147     else
 148       handleText(s);
 149   }
 150 
 151   public void write(char ch)
 152     throws IOException
 153   {
 154     boolean ok;
 155 
 156     switch (state)
 157     {
 158       case S_text:
 159         if (ch == '\n' || ch == '\r') {
 160           break;  // unadorned newlines are ignored
 161         } else if (ch == '{') {
 162           if (currentCharacters.length() > 0) {
 163             handleText(currentCharacters.toString());
 164             currentCharacters = new StringBuffer();
 165           }
 166           level ++;
 167           begingroup();
 168         } else if(ch == '}') {
 169           if (currentCharacters.length() > 0) {
 170             handleText(currentCharacters.toString());
 171             currentCharacters = new StringBuffer();
 172           }
 173           if (level == 0)
 174             throw new IOException("Too many close-groups in RTF text");
 175           endgroup();
 176           level --;
 177         } else if(ch == '\\') {
 178           if (currentCharacters.length() > 0) {
 179             handleText(currentCharacters.toString());
 180             currentCharacters = new StringBuffer();
 181           }
 182           state = S_backslashed;
 183         } else {
 184           currentCharacters.append(ch);
 185         }
 186         break;
 187       case S_backslashed:
 188         if (ch == '\'') {
 189           state = S_aftertick;
 190           break;
 191         }
 192         if (!Character.isLetter(ch)) {
 193           char newstring[] = new char[1];
 194           newstring[0] = ch;
 195           if (!handleKeyword(new String(newstring))) {
 196             warning("Unknown keyword: " + newstring + " (" + (byte)ch + ")");
 197           }
 198           state = S_text;
 199           pendingKeyword = null;
 200           /* currentCharacters is already an empty stringBuffer */
 201           break;
 202         }
 203 
 204         state = S_token;
 205         /* FALL THROUGH */
 206       case S_token:
 207         if (Character.isLetter(ch)) {
 208           currentCharacters.append(ch);
 209         } else {
 210           pendingKeyword = currentCharacters.toString();
 211           currentCharacters = new StringBuffer();
 212 
 213           // Parameter following?
 214           if (Character.isDigit(ch) || (ch == '-')) {
 215             state = S_parameter;
 216             currentCharacters.append(ch);
 217           } else {
 218             ok = handleKeyword(pendingKeyword);
 219             if (!ok)
 220               warning("Unknown keyword: " + pendingKeyword);
 221             pendingKeyword = null;
 222             state = S_text;
 223 
 224             // Non-space delimiters get included in the text
 225             if (!Character.isWhitespace(ch))
 226               write(ch);
 227           }
 228         }
 229         break;
 230       case S_parameter:
 231         if (Character.isDigit(ch)) {
 232           currentCharacters.append(ch);
 233         } else {
 234           /* TODO: Test correct behavior of \bin keyword */
 235           if (pendingKeyword.equals("bin")) {  /* magic layer-breaking kwd */
 236             long parameter = Long.parseLong(currentCharacters.toString());
 237             pendingKeyword = null;
 238             state = S_inblob;
 239             binaryBytesLeft = parameter;
 240             if (binaryBytesLeft > Integer.MAX_VALUE)
 241                 binaryBuf = new ByteArrayOutputStream(Integer.MAX_VALUE);
 242             else
 243                 binaryBuf = new ByteArrayOutputStream((int)binaryBytesLeft);
 244             savedSpecials = specialsTable;
 245             specialsTable = allSpecialsTable;
 246             break;
 247           }
 248 
 249           int parameter = Integer.parseInt(currentCharacters.toString());
 250           ok = handleKeyword(pendingKeyword, parameter);
 251           if (!ok)
 252             warning("Unknown keyword: " + pendingKeyword +
 253                     " (param " + currentCharacters + ")");
 254           pendingKeyword = null;
 255           currentCharacters = new StringBuffer();
 256           state = S_text;
 257 
 258           // Delimiters here are interpreted as text too
 259           if (!Character.isWhitespace(ch))
 260             write(ch);
 261         }
 262         break;
 263       case S_aftertick:
 264         if (Character.digit(ch, 16) == -1)
 265           state = S_text;
 266         else {
 267           pendingCharacter = Character.digit(ch, 16);
 268           state = S_aftertickc;
 269         }
 270         break;
 271       case S_aftertickc:
 272         state = S_text;
 273         if (Character.digit(ch, 16) != -1)
 274         {
 275           pendingCharacter = pendingCharacter * 16 + Character.digit(ch, 16);
 276           ch = translationTable[pendingCharacter];
 277           if (ch != 0)
 278               handleText(ch);
 279         }
 280         break;
 281       case S_inblob:
 282         binaryBuf.write(ch);
 283         binaryBytesLeft --;
 284         if (binaryBytesLeft == 0) {
 285             state = S_text;
 286             specialsTable = savedSpecials;
 287             savedSpecials = null;
 288             handleBinaryBlob(binaryBuf.toByteArray());
 289             binaryBuf = null;
 290         }
 291       }
 292   }
 293 
 294   /** Flushes any buffered but not yet written characters.
 295    *  Subclasses which override this method should call this
 296    *  method <em>before</em> flushing
 297    *  any of their own buffers. */
 298   public void flush()
 299     throws IOException
 300   {
 301     super.flush();
 302 
 303     if (state == S_text && currentCharacters.length() > 0) {
 304       handleText(currentCharacters.toString());
 305       currentCharacters = new StringBuffer();
 306     }
 307   }
 308 
 309   /** Closes the parser. Currently, this simply does a <code>flush()</code>,
 310    *  followed by some minimal consistency checks. */
 311   public void close()
 312     throws IOException
 313   {
 314     flush();
 315 
 316     if (state != S_text || level > 0) {
 317       warning("Truncated RTF file.");
 318 
 319       /* TODO: any sane way to handle termination in a non-S_text state? */
 320       /* probably not */
 321 
 322       /* this will cause subclasses to behave more reasonably
 323          some of the time */
 324       while (level > 0) {
 325           endgroup();
 326           level --;
 327       }
 328     }
 329 
 330     super.close();
 331   }
 332 
 333 }