1 /*
   2  * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package com.sun.xml.internal.bind.v2.runtime.output;
  27 
  28 import java.io.IOException;
  29 
  30 /**
  31  * Buffer for UTF-8 encoded string.
  32  *
  33  * See http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 for the UTF-8 encoding.
  34  *
  35  * @author Kohsuke Kawaguchi
  36  */
  37 public final class Encoded {
  38     public byte[] buf;
  39 
  40     public int len;
  41 
  42     public Encoded() {}
  43 
  44     public Encoded(String text) {
  45         set(text);
  46     }
  47 
  48     public void ensureSize(int size) {
  49         if(buf==null || buf.length<size)
  50             buf = new byte[size];
  51     }
  52 
  53     public final void set( String text ) {
  54         int length = text.length();
  55 
  56         ensureSize(length*3+1); // +1 for append
  57 
  58         int ptr = 0;
  59 
  60         for (int i = 0; i < length; i++) {
  61             final char chr = text.charAt(i);
  62             if (chr > 0x7F) {
  63                 if (chr > 0x7FF) {
  64                     if(Character.MIN_HIGH_SURROGATE<=chr && chr<=Character.MAX_LOW_SURROGATE) {
  65                         // surrogate
  66                         int uc = (((chr & 0x3ff) << 10) | (text.charAt(++i) & 0x3ff)) + 0x10000;
  67 
  68                         buf[ptr++] = (byte)(0xF0 | ((uc >> 18)));
  69                         buf[ptr++] = (byte)(0x80 | ((uc >> 12) & 0x3F));
  70                         buf[ptr++] = (byte)(0x80 | ((uc >> 6) & 0x3F));
  71                         buf[ptr++] = (byte)(0x80 + (uc & 0x3F));
  72                         continue;
  73                     }
  74                     buf[ptr++] = (byte)(0xE0 + (chr >> 12));
  75                     buf[ptr++] = (byte)(0x80 + ((chr >> 6) & 0x3F));
  76                 } else {
  77                     buf[ptr++] = (byte)(0xC0 + (chr >> 6));
  78                 }
  79                 buf[ptr++] = (byte)(0x80 + (chr & 0x3F));
  80             } else {
  81                 buf[ptr++] = (byte)chr;
  82             }
  83         }
  84 
  85         len = ptr;
  86     }
  87 
  88     /**
  89      * Fill in the buffer by encoding the specified characters
  90      * while escaping characters like &lt;
  91      *
  92      * @param isAttribute
  93      *      if true, characters like \t, \r, and \n are also escaped.
  94      */
  95     public final void setEscape(String text, boolean isAttribute) {
  96         int length = text.length();
  97         ensureSize(length*6+1);     // in the worst case the text is like """""", so we need 6 bytes per char
  98 
  99         int ptr = 0;
 100 
 101         for (int i = 0; i < length; i++) {
 102             final char chr = text.charAt(i);
 103 
 104             int ptr1 = ptr;
 105             if (chr > 0x7F) {
 106                 if (chr > 0x7FF) {
 107                     if(Character.MIN_HIGH_SURROGATE<=chr && chr<=Character.MAX_LOW_SURROGATE) {
 108                         // surrogate
 109                         int uc = (((chr & 0x3ff) << 10) | (text.charAt(++i) & 0x3ff)) + 0x10000;
 110 
 111                         buf[ptr++] = (byte)(0xF0 | ((uc >> 18)));
 112                         buf[ptr++] = (byte)(0x80 | ((uc >> 12) & 0x3F));
 113                         buf[ptr++] = (byte)(0x80 | ((uc >> 6) & 0x3F));
 114                         buf[ptr++] = (byte)(0x80 + (uc & 0x3F));
 115                         continue;
 116                     }
 117                     buf[ptr1++] = (byte)(0xE0 + (chr >> 12));
 118                     buf[ptr1++] = (byte)(0x80 + ((chr >> 6) & 0x3F));
 119                 } else {
 120                     buf[ptr1++] = (byte)(0xC0 + (chr >> 6));
 121                 }
 122                 buf[ptr1++] = (byte)(0x80 + (chr & 0x3F));
 123             } else {
 124                 byte[] ent;
 125 
 126                 if((ent=attributeEntities[chr])!=null) {
 127                     // the majority of the case is just printed as a char,
 128                     // so it's very important to reject them as quickly as possible
 129 
 130                     // check again to see if this really needs to be escaped
 131                     if(isAttribute || entities[chr]!=null)
 132                         ptr1 = writeEntity(ent,ptr1);
 133                     else
 134                         buf[ptr1++] = (byte)chr;
 135                 } else
 136                     buf[ptr1++] = (byte)chr;
 137             }
 138             ptr = ptr1;
 139         }
 140         len = ptr;
 141     }
 142 
 143     private int writeEntity( byte[] entity, int ptr ) {
 144         System.arraycopy(entity,0,buf,ptr,entity.length);
 145         return ptr+entity.length;
 146     }
 147 
 148     /**
 149      * Writes the encoded bytes to the given output stream.
 150      */
 151     public final void write(UTF8XmlOutput out) throws IOException {
 152         out.write(buf,0,len);
 153     }
 154 
 155     /**
 156      * Appends a new character to the end of the buffer.
 157      * This assumes that you have enough space in the buffer.
 158      */
 159     public void append(char b) {
 160         buf[len++] = (byte)b;
 161     }
 162 
 163     /**
 164      * Reallocate the buffer to the exact size of the data
 165      * to reduce the memory footprint.
 166      */
 167     public void compact() {
 168         byte[] b = new byte[len];
 169         System.arraycopy(buf,0,b,0,len);
 170         buf = b;
 171     }
 172 
 173     /**
 174      * UTF-8 encoded entities keyed by their character code.
 175      * e.g., entities['&'] == AMP_ENTITY.
 176      *
 177      * In attributes we need to encode more characters.
 178      */
 179     private static final byte[][] entities = new byte[0x80][];
 180     private static final byte[][] attributeEntities = new byte[0x80][];
 181 
 182     static {
 183         add('&',"&amp;",false);
 184         add('<',"&lt;",false);
 185         add('>',"&gt;",false);
 186         add('"',"&quot;",true);
 187         add('\t',"&#x9;",true);
 188         add('\r',"&#xD;",false);
 189         add('\n',"&#xA;",true);
 190     }
 191 
 192     private static void add(char c, String s, boolean attOnly) {
 193         byte[] image = UTF8XmlOutput.toBytes(s);
 194         attributeEntities[c] = image;
 195         if(!attOnly)
 196             entities[c] = image;
 197     }
 198 }