/* * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Oracle designates this * particular file as subject to the "Classpath" exception as provided * by Oracle in the LICENSE file that accompanied this code. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. */ package javax.swing.text.rtf; import java.lang.*; import java.util.*; import java.io.*; import java.awt.Color; import java.security.AccessController; import java.security.PrivilegedAction; import javax.swing.text.*; /** * Takes a sequence of RTF tokens and text and appends the text * described by the RTF to a StyledDocument (the target). * The RTF is lexed * from the character stream by the RTFParser which is this class's * superclass. * * This class is an indirect subclass of OutputStream. It must be closed * in order to guarantee that all of the text has been sent to * the text acceptor. * * @see RTFParser * @see java.io.OutputStream */ class RTFReader extends RTFParser { /** The object to which the parsed text is sent. */ StyledDocument target; /** Miscellaneous information about the parser's state. This * dictionary is saved and restored when an RTF group begins * or ends. */ Dictionary parserState; /* Current parser state */ /** This is the "dst" item from parserState. rtfDestination * is the current rtf destination. It is cached in an instance * variable for speed. */ Destination rtfDestination; /** This holds the current document attributes. */ MutableAttributeSet documentAttributes; /** This Dictionary maps Integer font numbers to String font names. */ Dictionary fontTable; /** This array maps color indices to Color objects. */ Color[] colorTable; /** This array maps character style numbers to Style objects. */ Style[] characterStyles; /** This array maps paragraph style numbers to Style objects. */ Style[] paragraphStyles; /** This array maps section style numbers to Style objects. */ Style[] sectionStyles; /** This is the RTF version number, extracted from the \rtf keyword. * The version information is currently not used. */ int rtfversion; /** true to indicate that if the next keyword is unknown, * the containing group should be ignored. */ boolean ignoreGroupIfUnknownKeyword; /** The parameter of the most recently parsed \\ucN keyword, * used for skipping alternative representations after a * Unicode character. */ int skippingCharacters; static private Dictionary straightforwardAttributes; static { straightforwardAttributes = RTFAttributes.attributesByKeyword(); } private MockAttributeSet mockery; /* this should be final, but there's a bug in javac... */ /** textKeywords maps RTF keywords to single-character strings, * for those keywords which simply insert some text. */ static Dictionary textKeywords = null; static { textKeywords = new Hashtable(); textKeywords.put("\\", "\\"); textKeywords.put("{", "{"); textKeywords.put("}", "}"); textKeywords.put(" ", "\u00A0"); /* not in the spec... */ textKeywords.put("~", "\u00A0"); /* nonbreaking space */ textKeywords.put("_", "\u2011"); /* nonbreaking hyphen */ textKeywords.put("bullet", "\u2022"); textKeywords.put("emdash", "\u2014"); textKeywords.put("emspace", "\u2003"); textKeywords.put("endash", "\u2013"); textKeywords.put("enspace", "\u2002"); textKeywords.put("ldblquote", "\u201C"); textKeywords.put("lquote", "\u2018"); textKeywords.put("ltrmark", "\u200E"); textKeywords.put("rdblquote", "\u201D"); textKeywords.put("rquote", "\u2019"); textKeywords.put("rtlmark", "\u200F"); textKeywords.put("tab", "\u0009"); textKeywords.put("zwj", "\u200D"); textKeywords.put("zwnj", "\u200C"); /* There is no Unicode equivalent to an optional hyphen, as far as I can tell. */ textKeywords.put("-", "\u2027"); /* TODO: optional hyphen */ } /* some entries in parserState */ static final String TabAlignmentKey = "tab_alignment"; static final String TabLeaderKey = "tab_leader"; static Dictionary characterSets; static boolean useNeXTForAnsi = false; static { characterSets = new Hashtable(); } /* TODO: per-font font encodings ( \fcharset control word ) ? */ /** * Creates a new RTFReader instance. Text will be sent to * the specified TextAcceptor. * * @param destination The TextAcceptor which is to receive the text. */ public RTFReader(StyledDocument destination) { int i; target = destination; parserState = new Hashtable(); fontTable = new Hashtable(); rtfversion = -1; mockery = new MockAttributeSet(); documentAttributes = new SimpleAttributeSet(); } /** Called when the RTFParser encounters a bin keyword in the * RTF stream. * * @see RTFParser */ public void handleBinaryBlob(byte[] data) { if (skippingCharacters > 0) { /* a blob only counts as one character for skipping purposes */ skippingCharacters --; return; } /* someday, someone will want to do something with blobs */ } /** * Handles any pure text (containing no control characters) in the input * stream. Called by the superclass. */ public void handleText(String text) { if (skippingCharacters > 0) { if (skippingCharacters >= text.length()) { skippingCharacters -= text.length(); return; } else { text = text.substring(skippingCharacters); skippingCharacters = 0; } } if (rtfDestination != null) { rtfDestination.handleText(text); return; } warning("Text with no destination. oops."); } /** The default color for text which has no specified color. */ Color defaultColor() { return Color.black; } /** Called by the superclass when a new RTF group is begun. * This implementation saves the current parserState, and gives * the current destination a chance to save its own state. * @see RTFParser#begingroup */ public void begingroup() { if (skippingCharacters > 0) { /* TODO this indicates an error in the RTF. Log it? */ skippingCharacters = 0; } /* we do this little dance to avoid cloning the entire state stack and immediately throwing it away. */ Object oldSaveState = parserState.get("_savedState"); if (oldSaveState != null) parserState.remove("_savedState"); @SuppressWarnings("unchecked") Dictionary saveState = (Dictionary)((Hashtable)parserState).clone(); if (oldSaveState != null) saveState.put("_savedState", oldSaveState); parserState.put("_savedState", saveState); if (rtfDestination != null) rtfDestination.begingroup(); } /** Called by the superclass when the current RTF group is closed. * This restores the parserState saved by begingroup() * as well as invoking the endgroup method of the current * destination. * @see RTFParser#endgroup */ public void endgroup() { if (skippingCharacters > 0) { /* NB this indicates an error in the RTF. Log it? */ skippingCharacters = 0; } @SuppressWarnings("unchecked") Dictionary restoredState = (Dictionary)parserState.get("_savedState"); Destination restoredDestination = (Destination)restoredState.get("dst"); if (restoredDestination != rtfDestination) { rtfDestination.close(); /* allow the destination to clean up */ rtfDestination = restoredDestination; } Dictionary oldParserState = parserState; parserState = restoredState; if (rtfDestination != null) rtfDestination.endgroup(oldParserState); } protected void setRTFDestination(Destination newDestination) { /* Check that setting the destination won't close the current destination (should never happen) */ @SuppressWarnings("unchecked") Dictionary previousState = (Dictionary)parserState.get("_savedState"); if (previousState != null) { if (rtfDestination != previousState.get("dst")) { warning("Warning, RTF destination overridden, invalid RTF."); rtfDestination.close(); } } rtfDestination = newDestination; parserState.put("dst", rtfDestination); } /** Called by the user when there is no more input (i.e., * at the end of the RTF file.) * * @see OutputStream#close */ public void close() throws IOException { Enumeration docProps = documentAttributes.getAttributeNames(); while(docProps.hasMoreElements()) { Object propName = docProps.nextElement(); target.putProperty(propName, documentAttributes.getAttribute(propName)); } /* RTFParser should have ensured that all our groups are closed */ warning("RTF filter done."); super.close(); } /** * Handles a parameterless RTF keyword. This is called by the superclass * (RTFParser) when a keyword is found in the input stream. * * @return true if the keyword is recognized and handled; * false otherwise * @see RTFParser#handleKeyword */ public boolean handleKeyword(String keyword) { String item; boolean ignoreGroupIfUnknownKeywordSave = ignoreGroupIfUnknownKeyword; if (skippingCharacters > 0) { skippingCharacters --; return true; } ignoreGroupIfUnknownKeyword = false; if ((item = textKeywords.get(keyword)) != null) { handleText(item); return true; } if (keyword.equals("fonttbl")) { setRTFDestination(new FonttblDestination()); return true; } if (keyword.equals("colortbl")) { setRTFDestination(new ColortblDestination()); return true; } if (keyword.equals("stylesheet")) { setRTFDestination(new StylesheetDestination()); return true; } if (keyword.equals("info")) { setRTFDestination(new InfoDestination()); return false; } if (keyword.equals("mac")) { setCharacterSet("mac"); return true; } if (keyword.equals("ansi")) { if (useNeXTForAnsi) setCharacterSet("NeXT"); else setCharacterSet("ansi"); return true; } if (keyword.equals("next")) { setCharacterSet("NeXT"); return true; } if (keyword.equals("pc")) { setCharacterSet("cpg437"); /* IBM Code Page 437 */ return true; } if (keyword.equals("pca")) { setCharacterSet("cpg850"); /* IBM Code Page 850 */ return true; } if (keyword.equals("*")) { ignoreGroupIfUnknownKeyword = true; return true; } if (rtfDestination != null) { if(rtfDestination.handleKeyword(keyword)) return true; } /* this point is reached only if the keyword is unrecognized */ /* other destinations we don't understand and therefore ignore */ if (keyword.equals("aftncn") || keyword.equals("aftnsep") || keyword.equals("aftnsepc") || keyword.equals("annotation") || keyword.equals("atnauthor") || keyword.equals("atnicn") || keyword.equals("atnid") || keyword.equals("atnref") || keyword.equals("atntime") || keyword.equals("atrfend") || keyword.equals("atrfstart") || keyword.equals("bkmkend") || keyword.equals("bkmkstart") || keyword.equals("datafield") || keyword.equals("do") || keyword.equals("dptxbxtext") || keyword.equals("falt") || keyword.equals("field") || keyword.equals("file") || keyword.equals("filetbl") || keyword.equals("fname") || keyword.equals("fontemb") || keyword.equals("fontfile") || keyword.equals("footer") || keyword.equals("footerf") || keyword.equals("footerl") || keyword.equals("footerr") || keyword.equals("footnote") || keyword.equals("ftncn") || keyword.equals("ftnsep") || keyword.equals("ftnsepc") || keyword.equals("header") || keyword.equals("headerf") || keyword.equals("headerl") || keyword.equals("headerr") || keyword.equals("keycode") || keyword.equals("nextfile") || keyword.equals("object") || keyword.equals("pict") || keyword.equals("pn") || keyword.equals("pnseclvl") || keyword.equals("pntxtb") || keyword.equals("pntxta") || keyword.equals("revtbl") || keyword.equals("rxe") || keyword.equals("tc") || keyword.equals("template") || keyword.equals("txe") || keyword.equals("xe")) { ignoreGroupIfUnknownKeywordSave = true; } if (ignoreGroupIfUnknownKeywordSave) { setRTFDestination(new DiscardingDestination()); } return false; } /** * Handles an RTF keyword and its integer parameter. * This is called by the superclass * (RTFParser) when a keyword is found in the input stream. * * @return true if the keyword is recognized and handled; * false otherwise * @see RTFParser#handleKeyword */ public boolean handleKeyword(String keyword, int parameter) { boolean ignoreGroupIfUnknownKeywordSave = ignoreGroupIfUnknownKeyword; if (skippingCharacters > 0) { skippingCharacters --; return true; } ignoreGroupIfUnknownKeyword = false; if (keyword.equals("uc")) { /* count of characters to skip after a unicode character */ parserState.put("UnicodeSkip", Integer.valueOf(parameter)); return true; } if (keyword.equals("u")) { if (parameter < 0) parameter = parameter + 65536; handleText((char)parameter); Number skip = (Number)(parserState.get("UnicodeSkip")); if (skip != null) { skippingCharacters = skip.intValue(); } else { skippingCharacters = 1; } return true; } if (keyword.equals("rtf")) { rtfversion = parameter; setRTFDestination(new DocumentDestination()); return true; } if (keyword.startsWith("NeXT") || keyword.equals("private")) ignoreGroupIfUnknownKeywordSave = true; if (rtfDestination != null) { if(rtfDestination.handleKeyword(keyword, parameter)) return true; } /* this point is reached only if the keyword is unrecognized */ if (ignoreGroupIfUnknownKeywordSave) { setRTFDestination(new DiscardingDestination()); } return false; } private void setTargetAttribute(String name, Object value) { // target.changeAttributes(new LFDictionary(LFArray.arrayWithObject(value), LFArray.arrayWithObject(name))); } /** * setCharacterSet sets the current translation table to correspond with * the named character set. The character set is loaded if necessary. * * @see AbstractFilter */ public void setCharacterSet(String name) { Object set; try { set = getCharacterSet(name); } catch (Exception e) { warning("Exception loading RTF character set \"" + name + "\": " + e); set = null; } if (set != null) { translationTable = (char[])set; } else { warning("Unknown RTF character set \"" + name + "\""); if (!name.equals("ansi")) { try { translationTable = (char[])getCharacterSet("ansi"); } catch (IOException e) { throw new InternalError("RTFReader: Unable to find character set resources (" + e + ")", e); } } } setTargetAttribute(Constants.RTFCharacterSet, name); } /** Adds a character set to the RTFReader's list * of known character sets */ public static void defineCharacterSet(String name, char[] table) { if (table.length < 256) throw new IllegalArgumentException("Translation table must have 256 entries."); characterSets.put(name, table); } /** Looks up a named character set. A character set is a 256-entry * array of characters, mapping unsigned byte values to their Unicode * equivalents. The character set is loaded if necessary. * * @return the character set */ public static Object getCharacterSet(final String name) throws IOException { char[] set = characterSets.get(name); if (set == null) { InputStream charsetStream = AccessController.doPrivileged( new PrivilegedAction() { public InputStream run() { return RTFReader.class.getResourceAsStream("charsets/" + name + ".txt"); } }); set = readCharset(charsetStream); defineCharacterSet(name, set); } return set; } /** Parses a character set from an InputStream. The character set * must contain 256 decimal integers, separated by whitespace, with * no punctuation. B- and C- style comments are allowed. * * @return the newly read character set */ static char[] readCharset(InputStream strm) throws IOException { char[] values = new char[256]; int i; StreamTokenizer in = new StreamTokenizer(new BufferedReader( new InputStreamReader(strm, "ISO-8859-1"))); in.eolIsSignificant(false); in.commentChar('#'); in.slashSlashComments(true); in.slashStarComments(true); i = 0; while (i < 256) { int ttype; try { ttype = in.nextToken(); } catch (Exception e) { throw new IOException("Unable to read from character set file (" + e + ")"); } if (ttype != StreamTokenizer.TT_NUMBER) { // System.out.println("Bad token: type=" + ttype + " tok=" + in.sval); throw new IOException("Unexpected token in character set file"); // continue; } values[i] = (char)(in.nval); i++; } return values; } static char[] readCharset(java.net.URL href) throws IOException { return readCharset(href.openStream()); } /** An interface (could be an entirely abstract class) describing * a destination. The RTF reader always has a current destination * which is where text is sent. * * @see RTFReader */ interface Destination { void handleBinaryBlob(byte[] data); void handleText(String text); boolean handleKeyword(String keyword); boolean handleKeyword(String keyword, int parameter); void begingroup(); void endgroup(Dictionary oldState); void close(); } /** This data-sink class is used to implement ignored destinations * (e.g. {\*\blegga blah blah blah} ) * It accepts all keywords and text but does nothing with them. */ class DiscardingDestination implements Destination { public void handleBinaryBlob(byte[] data) { /* Discard binary blobs. */ } public void handleText(String text) { /* Discard text. */ } public boolean handleKeyword(String text) { /* Accept and discard keywords. */ return true; } public boolean handleKeyword(String text, int parameter) { /* Accept and discard parameterized keywords. */ return true; } public void begingroup() { /* Ignore groups --- the RTFReader will keep track of the current group level as necessary */ } public void endgroup(Dictionary oldState) { /* Ignore groups */ } public void close() { /* No end-of-destination cleanup needed */ } } /** Reads the fonttbl group, inserting fonts into the RTFReader's * fontTable dictionary. */ class FonttblDestination implements Destination { int nextFontNumber; Integer fontNumberKey = null; String nextFontFamily; public void handleBinaryBlob(byte[] data) { /* Discard binary blobs. */ } public void handleText(String text) { int semicolon = text.indexOf(';'); String fontName; if (semicolon > -1) fontName = text.substring(0, semicolon); else fontName = text; /* TODO: do something with the font family. */ if (nextFontNumber == -1 && fontNumberKey != null) { //font name might be broken across multiple calls fontName = fontTable.get(fontNumberKey) + fontName; } else { fontNumberKey = Integer.valueOf(nextFontNumber); } fontTable.put(fontNumberKey, fontName); nextFontNumber = -1; nextFontFamily = null; } public boolean handleKeyword(String keyword) { if (keyword.charAt(0) == 'f') { nextFontFamily = keyword.substring(1); return true; } return false; } public boolean handleKeyword(String keyword, int parameter) { if (keyword.equals("f")) { nextFontNumber = parameter; return true; } return false; } /* Groups are irrelevant. */ public void begingroup() {} public void endgroup(Dictionary oldState) {} /* currently, the only thing we do when the font table ends is dump its contents to the debugging log. */ public void close() { Enumeration nums = fontTable.keys(); warning("Done reading font table."); while(nums.hasMoreElements()) { Integer num = nums.nextElement(); warning("Number " + num + ": " + fontTable.get(num)); } } } /** Reads the colortbl group. Upon end-of-group, the RTFReader's * color table is set to an array containing the read colors. */ class ColortblDestination implements Destination { int red, green, blue; Vector proTemTable; public ColortblDestination() { red = 0; green = 0; blue = 0; proTemTable = new Vector(); } public void handleText(String text) { int index; for (index = 0; index < text.length(); index ++) { if (text.charAt(index) == ';') { Color newColor; newColor = new Color(red, green, blue); proTemTable.addElement(newColor); } } } public void close() { int count = proTemTable.size(); warning("Done reading color table, " + count + " entries."); colorTable = new Color[count]; proTemTable.copyInto(colorTable); } public boolean handleKeyword(String keyword, int parameter) { if (keyword.equals("red")) red = parameter; else if (keyword.equals("green")) green = parameter; else if (keyword.equals("blue")) blue = parameter; else return false; return true; } /* Colortbls don't understand any parameterless keywords */ public boolean handleKeyword(String keyword) { return false; } /* Groups are irrelevant. */ public void begingroup() {} public void endgroup(Dictionary oldState) {} /* Shouldn't see any binary blobs ... */ public void handleBinaryBlob(byte[] data) {} } /** Handles the stylesheet keyword. Styles are read and sorted * into the three style arrays in the RTFReader. */ class StylesheetDestination extends DiscardingDestination implements Destination { Dictionary definedStyles; public StylesheetDestination() { definedStyles = new Hashtable(); } public void begingroup() { setRTFDestination(new StyleDefiningDestination()); } public void close() { Vector