src/com/sun/org/apache/xml/internal/serializer/CharInfo.java

Print this page

        

*** 53,63 **** * @xsl.usage internal */ final class CharInfo { /** Given a character, lookup a String to output (e.g. a decorated entity reference). */ ! private HashMap m_charToString; /** * The name of the HTML entities file. * If specified, the file will be resource loaded with the default class loader. */ --- 53,63 ---- * @xsl.usage internal */ final class CharInfo { /** Given a character, lookup a String to output (e.g. a decorated entity reference). */ ! private HashMap m_charToString = new HashMap(); /** * The name of the HTML entities file. * If specified, the file will be resource loaded with the default class loader. */
*** 70,123 **** */ public static final String XML_ENTITIES_RESOURCE = "com.sun.org.apache.xml.internal.serializer.XMLEntities"; /** The horizontal tab character, which the parser should always normalize. */ ! static final char S_HORIZONAL_TAB = 0x09; /** The linefeed character, which the parser should always normalize. */ ! static final char S_LINEFEED = 0x0A; /** The carriage return character, which the parser should always normalize. */ ! static final char S_CARRIAGERETURN = 0x0D; ! static final char S_SPACE = 0x20; ! static final char S_QUOTE = 0x22; ! static final char S_LT = 0x3C; ! static final char S_GT = 0x3E; ! static final char S_NEL = 0x85; ! static final char S_LINE_SEPARATOR = 0x2028; /** This flag is an optimization for HTML entities. It false if entities * other than quot (34), amp (38), lt (60) and gt (62) are defined * in the range 0 to 127. * @xsl.usage internal */ ! boolean onlyQuotAmpLtGt; /** Copy the first 0,1 ... ASCII_MAX values into an array */ ! static final int ASCII_MAX = 128; /** Array of values is faster access than a set of bits ! * to quickly check ASCII characters in attribute values, ! * the value is true if the character in an attribute value ! * should be mapped to a String. */ ! private final boolean[] shouldMapAttrChar_ASCII; /** Array of values is faster access than a set of bits ! * to quickly check ASCII characters in text nodes, ! * the value is true if the character in a text node ! * should be mapped to a String. */ ! private final boolean[] shouldMapTextChar_ASCII; /** An array of bits to record if the character is in the set. * Although information in this array is complete, the * isSpecialAttrASCII array is used first because access to its values * is common and faster. */ ! private final int array_of_bits[]; // 5 for 32 bit words, 6 for 64 bit words ... /* * This constant is used to shift an integer to quickly --- 70,115 ---- */ public static final String XML_ENTITIES_RESOURCE = "com.sun.org.apache.xml.internal.serializer.XMLEntities"; /** The horizontal tab character, which the parser should always normalize. */ ! public static final char S_HORIZONAL_TAB = 0x09; /** The linefeed character, which the parser should always normalize. */ ! public static final char S_LINEFEED = 0x0A; /** The carriage return character, which the parser should always normalize. */ ! public static final char S_CARRIAGERETURN = 0x0D; /** This flag is an optimization for HTML entities. It false if entities * other than quot (34), amp (38), lt (60) and gt (62) are defined * in the range 0 to 127. * @xsl.usage internal */ ! final boolean onlyQuotAmpLtGt; /** Copy the first 0,1 ... ASCII_MAX values into an array */ ! private static final int ASCII_MAX = 128; /** Array of values is faster access than a set of bits ! * to quickly check ASCII characters in attribute values. */ ! private boolean[] isSpecialAttrASCII = new boolean[ASCII_MAX]; /** Array of values is faster access than a set of bits ! * to quickly check ASCII characters in text nodes. */ ! private boolean[] isSpecialTextASCII = new boolean[ASCII_MAX]; ! ! private boolean[] isCleanTextASCII = new boolean[ASCII_MAX]; /** An array of bits to record if the character is in the set. * Although information in this array is complete, the * isSpecialAttrASCII array is used first because access to its values * is common and faster. */ ! private int array_of_bits[] = createEmptySetOfIntegers(65535); // 5 for 32 bit words, 6 for 64 bit words ... /* * This constant is used to shift an integer to quickly
*** 144,185 **** */ private int firstWordNotUsed; /** ! * A base constructor just to explicitly create the fields, ! * with the exception of m_charToString which is handled ! * by the constructor that delegates base construction to this one. ! * <p> ! * m_charToString is not created here only for performance reasons, ! * to avoid creating a Hashtable that will be replaced when ! * making a mutable copy, {@link #mutableCopyOf(CharInfo)}. * */ ! private CharInfo() { ! this.array_of_bits = createEmptySetOfIntegers(65535); ! this.firstWordNotUsed = 0; ! this.shouldMapAttrChar_ASCII = new boolean[ASCII_MAX]; ! this.shouldMapTextChar_ASCII = new boolean[ASCII_MAX]; ! this.m_charKey = new CharKey(); ! ! // Not set here, but in a constructor that uses this one ! // this.m_charToString = new Hashtable(); ! ! this.onlyQuotAmpLtGt = true; ! ! ! return; } private CharInfo(String entitiesResource, String method, boolean internal) { - // call the default constructor to create the fields - this(); - m_charToString = new HashMap(); - ResourceBundle entities = null; boolean noExtraEntities = true; // Make various attempts to interpret the parameter as a properties // file or resource file, as follows: --- 136,172 ---- */ private int firstWordNotUsed; /** ! * Constructor that reads in a resource file that describes the mapping of ! * characters to entity references. ! * This constructor is private, just to force the use ! * of the getCharInfo(entitiesResource) factory ! * ! * Resource files must be encoded in UTF-8 and can either be properties ! * files with a .properties extension assumed. Alternatively, they can ! * have the following form, with no particular extension assumed: ! * ! * <pre> ! * # First char # is a comment ! * Entity numericValue ! * quot 34 ! * amp 38 ! * </pre> * + * @param entitiesResource Name of properties or resource file that should + * be loaded, which describes that mapping of characters to entity + * references. */ ! private CharInfo(String entitiesResource, String method) { ! this(entitiesResource, method, false); } private CharInfo(String entitiesResource, String method, boolean internal) { ResourceBundle entities = null; boolean noExtraEntities = true; // Make various attempts to interpret the parameter as a properties // file or resource file, as follows:
*** 201,214 **** Enumeration keys = entities.getKeys(); while (keys.hasMoreElements()){ String name = (String) keys.nextElement(); String value = entities.getString(name); int code = Integer.parseInt(value); ! boolean extra = defineEntity(name, (char) code); ! if (extra) noExtraEntities = false; } } else { InputStream is = null; // Load user specified resource file by using URL loading, it // requires a valid URI as parameter --- 188,203 ---- Enumeration keys = entities.getKeys(); while (keys.hasMoreElements()){ String name = (String) keys.nextElement(); String value = entities.getString(name); int code = Integer.parseInt(value); ! defineEntity(name, (char) code); ! if (extraEntity(code)) noExtraEntities = false; } + set(S_LINEFEED); + set(S_CARRIAGERETURN); } else { InputStream is = null; // Load user specified resource file by using URL loading, it // requires a valid URI as parameter
*** 288,307 **** value = value.substring(0, index); } int code = Integer.parseInt(value); ! boolean extra = defineEntity(name, (char) code); ! if (extra) noExtraEntities = false; } } line = reader.readLine(); } is.close(); } catch (Exception e) { throw new RuntimeException( Utils.messages.createMessage( MsgKey.ER_RESOURCE_COULD_NOT_LOAD, new Object[] { entitiesResource, --- 277,298 ---- value = value.substring(0, index); } int code = Integer.parseInt(value); ! defineEntity(name, (char) code); ! if (extraEntity(code)) noExtraEntities = false; } } line = reader.readLine(); } is.close(); + set(S_LINEFEED); + set(S_CARRIAGERETURN); } catch (Exception e) { throw new RuntimeException( Utils.messages.createMessage( MsgKey.ER_RESOURCE_COULD_NOT_LOAD, new Object[] { entitiesResource,
*** 315,326 **** --- 306,340 ---- } catch (Exception except) {} } } } + /* initialize the array isCleanTextASCII[] with a cache of values + * for use by ToStream.character(char[], int , int) + * and the array isSpecialTextASCII[] with the opposite values + * (all in the name of performance!) + */ + for (int ch = 0; ch <ASCII_MAX; ch++) + if((((0x20 <= ch || (0x0A == ch || 0x0D == ch || 0x09 == ch))) + && (!get(ch))) || ('"' == ch)) + { + isCleanTextASCII[ch] = true; + isSpecialTextASCII[ch] = false; + } + else { + isCleanTextASCII[ch] = false; + isSpecialTextASCII[ch] = true; + } + + + onlyQuotAmpLtGt = noExtraEntities; + // initialize the array with a cache of the BitSet values + for (int i=0; i<ASCII_MAX; i++) + isSpecialAttrASCII[i] = get(i); + /* Now that we've used get(ch) just above to initialize the * two arrays we will change by adding a tab to the set of * special chars for XML (but not HTML!). * We do this because a tab is always a * special character in an XML attribute,
*** 328,390 **** * if it has an entity defined for it. * This is the reason for this delay. */ if (Method.XML.equals(method)) { ! // We choose not to escape the quotation mark as &quot; in text nodes ! shouldMapTextChar_ASCII[S_QUOTE] = false; ! } ! ! if (Method.HTML.equals(method)) { ! // The XSLT 1.0 recommendation says ! // "The html output method should not escape < characters occurring in attribute values." ! // So we don't escape '<' in an attribute for HTML ! shouldMapAttrChar_ASCII['<'] = false; ! ! // We choose not to escape the quotation mark as &quot; in text nodes. ! shouldMapTextChar_ASCII[S_QUOTE] = false; } } /** * Defines a new character reference. The reference's name and value are * supplied. Nothing happens if the character reference is already defined. * <p>Unlike internal entities, character references are a string to single * character mapping. They are used to map non-ASCII characters both on ! * parsing and printing, primarily for HTML documents. '&amp;lt;' is an * example of a character reference.</p> * * @param name The entity's name * @param value The entity's value - * @return true if the mapping is not one of: - * <ul> - * <li> '<' to "&lt;" - * <li> '>' to "&gt;" - * <li> '&' to "&amp;" - * <li> '"' to "&quot;" - * </ul> */ ! private boolean defineEntity(String name, char value) { StringBuilder sb = new StringBuilder("&"); sb.append(name); sb.append(';'); String entityString = sb.toString(); ! boolean extra = defineChar2StringMapping(entityString, value); ! return extra; } /** - * A utility object, just used to map characters to output Strings, - * needed because a HashMap needs to map an object as a key, not a - * Java primitive type, like a char, so this object gets around that - * and it is reusable. - */ - private final CharKey m_charKey; - - /** * Map a character to a String. For example given * the character '>' this method would return the fully decorated * entity name "&lt;". * Strings for entity references are loaded from a properties file, * but additional mappings defined through calls to defineChar2String() --- 342,377 ---- * if it has an entity defined for it. * This is the reason for this delay. */ if (Method.XML.equals(method)) { ! isSpecialAttrASCII[S_HORIZONAL_TAB] = true; } } /** * Defines a new character reference. The reference's name and value are * supplied. Nothing happens if the character reference is already defined. * <p>Unlike internal entities, character references are a string to single * character mapping. They are used to map non-ASCII characters both on ! * parsing and printing, primarily for HTML documents. '&lt;amp;' is an * example of a character reference.</p> * * @param name The entity's name * @param value The entity's value */ ! private void defineEntity(String name, char value) { StringBuilder sb = new StringBuilder("&"); sb.append(name); sb.append(';'); String entityString = sb.toString(); ! defineChar2StringMapping(entityString, value); } /** * Map a character to a String. For example given * the character '>' this method would return the fully decorated * entity name "&lt;". * Strings for entity references are loaded from a properties file, * but additional mappings defined through calls to defineChar2String()
*** 411,464 **** return (String) m_charToString.get(charKey); } /** * Tell if the character argument that is from ! * an attribute value has a mapping to a String. * * @param value the value of a character that is in an attribute value * @return true if the character should have any special treatment, * such as when writing out attribute values, ! * such as when writing out entity references. * @xsl.usage internal */ ! final boolean shouldMapAttrChar(int value) { // for performance try the values in the boolean array first, // this is faster access than the BitSet for common ASCII values if (value < ASCII_MAX) ! return shouldMapAttrChar_ASCII[value]; // rather than java.util.BitSet, our private // implementation is faster (and less general). return get(value); } /** * Tell if the character argument that is from a ! * text node has a mapping to a String, for example ! * to map '<' to "&lt;". * * @param value the value of a character that is in a text node ! * @return true if the character has a mapping to a String, ! * such as when writing out entity references. * @xsl.usage internal */ ! final boolean shouldMapTextChar(int value) { // for performance try the values in the boolean array first, // this is faster access than the BitSet for common ASCII values if (value < ASCII_MAX) ! return shouldMapTextChar_ASCII[value]; // rather than java.util.BitSet, our private // implementation is faster (and less general). return get(value); } private static CharInfo getCharInfoBasedOnPrivilege( final String entitiesFileName, final String method, final boolean internal){ return (CharInfo) AccessController.doPrivileged( new PrivilegedAction() { --- 398,472 ---- return (String) m_charToString.get(charKey); } /** * Tell if the character argument that is from ! * an attribute value should have special treatment. * * @param value the value of a character that is in an attribute value * @return true if the character should have any special treatment, * such as when writing out attribute values, ! * or entity references. * @xsl.usage internal */ ! final boolean isSpecialAttrChar(int value) { // for performance try the values in the boolean array first, // this is faster access than the BitSet for common ASCII values if (value < ASCII_MAX) ! return isSpecialAttrASCII[value]; // rather than java.util.BitSet, our private // implementation is faster (and less general). return get(value); } /** * Tell if the character argument that is from a ! * text node should have special treatment. * * @param value the value of a character that is in a text node ! * @return true if the character should have any special treatment, ! * such as when writing out attribute values, ! * or entity references. * @xsl.usage internal */ ! final boolean isSpecialTextChar(int value) { // for performance try the values in the boolean array first, // this is faster access than the BitSet for common ASCII values if (value < ASCII_MAX) ! return isSpecialTextASCII[value]; // rather than java.util.BitSet, our private // implementation is faster (and less general). return get(value); } + /** + * This method is used to determine if an ASCII character in + * a text node (not an attribute value) is "clean". + * @param value the character to check (0 to 127). + * @return true if the character can go to the writer as-is + * @xsl.usage internal + */ + final boolean isTextASCIIClean(int value) + { + return isCleanTextASCII[value]; + } + + // In the future one might want to use the array directly and avoid + // the method call, but I think the JIT alreay inlines this well enough + // so don't do it (for now) - bjm + // public final boolean[] getASCIIClean() + // { + // return isCleanTextASCII; + // } + + private static CharInfo getCharInfoBasedOnPrivilege( final String entitiesFileName, final String method, final boolean internal){ return (CharInfo) AccessController.doPrivileged( new PrivilegedAction() {
*** 489,509 **** */ static CharInfo getCharInfo(String entitiesFileName, String method) { CharInfo charInfo = (CharInfo) m_getCharInfoCache.get(entitiesFileName); if (charInfo != null) { ! return mutableCopyOf(charInfo); } // try to load it internally - cache try { charInfo = getCharInfoBasedOnPrivilege(entitiesFileName, method, true); - // Put the common copy of charInfo in the cache, but return - // a copy of it. m_getCharInfoCache.put(entitiesFileName, charInfo); ! return mutableCopyOf(charInfo); } catch (Exception e) {} // try to load it externally - do not cache try { return getCharInfoBasedOnPrivilege(entitiesFileName, --- 497,515 ---- */ static CharInfo getCharInfo(String entitiesFileName, String method) { CharInfo charInfo = (CharInfo) m_getCharInfoCache.get(entitiesFileName); if (charInfo != null) { ! return charInfo; } // try to load it internally - cache try { charInfo = getCharInfoBasedOnPrivilege(entitiesFileName, method, true); m_getCharInfoCache.put(entitiesFileName, charInfo); ! return charInfo; } catch (Exception e) {} // try to load it externally - do not cache try { return getCharInfoBasedOnPrivilege(entitiesFileName,
*** 526,570 **** return getCharInfoBasedOnPrivilege(entitiesFileName, method, false); } ! /** ! * Create a mutable copy of the cached one. ! * @param charInfo The cached one. ! * @return ! */ ! private static CharInfo mutableCopyOf(CharInfo charInfo) { ! CharInfo copy = new CharInfo(); ! ! int max = charInfo.array_of_bits.length; ! System.arraycopy(charInfo.array_of_bits,0,copy.array_of_bits,0,max); ! ! copy.firstWordNotUsed = charInfo.firstWordNotUsed; ! ! max = charInfo.shouldMapAttrChar_ASCII.length; ! System.arraycopy(charInfo.shouldMapAttrChar_ASCII,0,copy.shouldMapAttrChar_ASCII,0,max); ! ! max = charInfo.shouldMapTextChar_ASCII.length; ! System.arraycopy(charInfo.shouldMapTextChar_ASCII,0,copy.shouldMapTextChar_ASCII,0,max); ! ! // utility field copy.m_charKey is already created in the default constructor ! ! copy.m_charToString = (HashMap) charInfo.m_charToString.clone(); ! ! copy.onlyQuotAmpLtGt = charInfo.onlyQuotAmpLtGt; ! ! return copy; ! } ! ! /** ! * Table of user-specified char infos. ! * The table maps entify file names (the name of the ! * property file without the .properties extension) ! * to CharInfo objects populated with entities defined in ! * corresponding property file. ! */ private static HashMap m_getCharInfoCache = new HashMap(); /** * Returns the array element holding the bit value for the * given integer --- 532,542 ---- return getCharInfoBasedOnPrivilege(entitiesFileName, method, false); } ! /** Table of user-specified char infos. */ private static HashMap m_getCharInfoCache = new HashMap(); /** * Returns the array element holding the bit value for the * given integer
*** 602,613 **** * @param i the integer to add to the set, valid values are * 0, 1, 2 ... up to the maximum that was specified at * the creation of the set. */ private final void set(int i) { ! setASCIItextDirty(i); ! setASCIIattrDirty(i); int j = (i >> SHIFT_PER_WORD); // this word is used int k = j + 1; if(firstWordNotUsed < k) // for optimization purposes. --- 574,584 ---- * @param i the integer to add to the set, valid values are * 0, 1, 2 ... up to the maximum that was specified at * the creation of the set. */ private final void set(int i) { ! setASCIIdirty(i); int j = (i >> SHIFT_PER_WORD); // this word is used int k = j + 1; if(firstWordNotUsed < k) // for optimization purposes.
*** 638,748 **** (1 << (i & LOW_ORDER_BITMASK)) ) != 0; // 0L for 64 bit words return in_the_set; } /** ! * This method returns true if there are some non-standard mappings to ! * entities other than quot, amp, lt, gt, and its only purpose is for ! * performance. ! * @param charToMap The value of the character that is mapped to a String ! * @param outputString The String to which the character is mapped, usually ! * an entity reference such as "&lt;". ! * @return true if the mapping is not one of: ! * <ul> ! * <li> '<' to "&lt;" ! * <li> '>' to "&gt;" ! * <li> '&' to "&amp;" ! * <li> '"' to "&quot;" ! * </ul> */ ! private boolean extraEntity(String outputString, int charToMap) { boolean extra = false; ! if (charToMap < ASCII_MAX) { ! switch (charToMap) { ! case '"' : // quot ! if (!outputString.equals("&quot;")) ! extra = true; ! break; ! case '&' : // amp ! if (!outputString.equals("&amp;")) ! extra = true; ! break; ! case '<' : // lt ! if (!outputString.equals("&lt;")) ! extra = true; ! break; ! case '>' : // gt ! if (!outputString.equals("&gt;")) ! extra = true; break; default : // other entity in range 0 to 127 extra = true; } } return extra; } /** ! * If the character is in the ASCII range then ! * mark it as needing replacement with ! * a String on output if it occurs in a text node. * @param ch */ ! private void setASCIItextDirty(int j) { if (0 <= j && j < ASCII_MAX) { ! shouldMapTextChar_ASCII[j] = true; } } /** ! * If the character is in the ASCII range then ! * mark it as needing replacement with ! * a String on output if it occurs in a attribute value. * @param ch */ ! private void setASCIIattrDirty(int j) { if (0 <= j && j < ASCII_MAX) { ! shouldMapAttrChar_ASCII[j] = true; } } ! /** ! * Call this method to register a char to String mapping, for example ! * to map '<' to "&lt;". ! * @param outputString The String to map to. ! * @param inputChar The char to map from. ! * @return true if the mapping is not one of: ! * <ul> ! * <li> '<' to "&lt;" ! * <li> '>' to "&gt;" ! * <li> '&' to "&amp;" ! * <li> '"' to "&quot;" ! * </ul> ! */ ! boolean defineChar2StringMapping(String outputString, char inputChar) { CharKey character = new CharKey(inputChar); m_charToString.put(character, outputString); ! set(inputChar); // mark the character has having a mapping to a String ! ! boolean extraMapping = extraEntity(outputString, inputChar); ! return extraMapping; ! } /** * Simple class for fast lookup of char values, when used with * hashtables. You can set the char, then use it as a key. * * @xsl.usage internal */ private static class CharKey extends Object { --- 609,688 ---- (1 << (i & LOW_ORDER_BITMASK)) ) != 0; // 0L for 64 bit words return in_the_set; } + // record if there are any entities other than + // quot, amp, lt, gt (probably user defined) /** ! * @return true if the entity ! * @param code The value of the character that has an entity defined ! * for it. */ ! private boolean extraEntity(int entityValue) { boolean extra = false; ! if (entityValue < 128) { ! switch (entityValue) { ! case 34 : // quot ! case 38 : // amp ! case 60 : // lt ! case 62 : // gt break; default : // other entity in range 0 to 127 extra = true; } } return extra; } /** ! * If the character is a printable ASCII character then ! * mark it as not clean and needing replacement with ! * a String on output. * @param ch */ ! private void setASCIIdirty(int j) { if (0 <= j && j < ASCII_MAX) { ! isCleanTextASCII[j] = false; ! isSpecialTextASCII[j] = true; } } /** ! * If the character is a printable ASCII character then ! * mark it as and not needing replacement with ! * a String on output. * @param ch */ ! private void setASCIIclean(int j) { if (0 <= j && j < ASCII_MAX) { ! isCleanTextASCII[j] = true; ! isSpecialTextASCII[j] = false; } } ! private void defineChar2StringMapping(String outputString, char inputChar) { CharKey character = new CharKey(inputChar); m_charToString.put(character, outputString); ! set(inputChar); } /** * Simple class for fast lookup of char values, when used with * hashtables. You can set the char, then use it as a key. * + * This class is a copy of the one in com.sun.org.apache.xml.internal.utils. + * It exists to cut the serializers dependancy on that package. + * * @xsl.usage internal */ private static class CharKey extends Object {