1 /*
   2  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package javax.xml.bind;
  27 
  28 /**
  29  * Processes white space normalization.
  30  *
  31  * @since 1.0
  32  */
  33 abstract class WhiteSpaceProcessor {
  34 
  35 // benchmarking (see test/src/ReplaceTest.java in the CVS Attic)
  36 // showed that this code is slower than the current code.
  37 //
  38 //    public static String replace(String text) {
  39 //        final int len = text.length();
  40 //        StringBuffer result = new StringBuffer(len);
  41 //
  42 //        for (int i = 0; i < len; i++) {
  43 //            char ch = text.charAt(i);
  44 //            if (isWhiteSpace(ch))
  45 //                result.append(' ');
  46 //            else
  47 //                result.append(ch);
  48 //        }
  49 //
  50 //        return result.toString();
  51 //    }
  52 
  53     public static String replace(String text) {
  54         return replace( (CharSequence)text ).toString();
  55     }
  56 
  57     /**
  58      * @since 2.0
  59      */
  60     public static CharSequence replace(CharSequence text) {
  61         int i=text.length()-1;
  62 
  63         // look for the first whitespace char.
  64         while( i>=0 && !isWhiteSpaceExceptSpace(text.charAt(i)) )
  65             i--;
  66 
  67         if( i<0 )
  68             // no such whitespace. replace(text)==text.
  69             return text;
  70 
  71         // we now know that we need to modify the text.
  72         // allocate a char array to do it.
  73         StringBuilder buf = new StringBuilder(text);
  74 
  75         buf.setCharAt(i--,' ');
  76         for( ; i>=0; i-- )
  77             if( isWhiteSpaceExceptSpace(buf.charAt(i)))
  78                 buf.setCharAt(i,' ');
  79 
  80         return new String(buf);
  81     }
  82 
  83     /**
  84      * Equivalent of {@link String#trim()}.
  85      * @since 2.0
  86      */
  87     public static CharSequence trim(CharSequence text) {
  88         int len = text.length();
  89         int start = 0;
  90 
  91         while( start<len && isWhiteSpace(text.charAt(start)) )
  92             start++;
  93 
  94         int end = len-1;
  95 
  96         while( end>start && isWhiteSpace(text.charAt(end)) )
  97             end--;
  98 
  99         if(start==0 && end==len-1)
 100             return text;    // no change
 101         else
 102             return text.subSequence(start,end+1);
 103     }
 104 
 105     public static String collapse(String text) {
 106         return collapse( (CharSequence)text ).toString();
 107     }
 108 
 109     /**
 110      * This is usually the biggest processing bottleneck.
 111      *
 112      * @since 2.0
 113      */
 114     public static CharSequence collapse(CharSequence text) {
 115         int len = text.length();
 116 
 117         // most of the texts are already in the collapsed form.
 118         // so look for the first whitespace in the hope that we will
 119         // never see it.
 120         int s=0;
 121         while(s<len) {
 122             if(isWhiteSpace(text.charAt(s)))
 123                 break;
 124             s++;
 125         }
 126         if(s==len)
 127             // the input happens to be already collapsed.
 128             return text;
 129 
 130         // we now know that the input contains spaces.
 131         // let's sit down and do the collapsing normally.
 132 
 133         StringBuilder result = new StringBuilder(len /*allocate enough size to avoid re-allocation*/ );
 134 
 135         if(s!=0) {
 136             for( int i=0; i<s; i++ )
 137                 result.append(text.charAt(i));
 138             result.append(' ');
 139         }
 140 
 141         boolean inStripMode = true;
 142         for (int i = s+1; i < len; i++) {
 143             char ch = text.charAt(i);
 144             boolean b = isWhiteSpace(ch);
 145             if (inStripMode && b)
 146                 continue; // skip this character
 147 
 148             inStripMode = b;
 149             if (inStripMode)
 150                 result.append(' ');
 151             else
 152                 result.append(ch);
 153         }
 154 
 155         // remove trailing whitespaces
 156         len = result.length();
 157         if (len > 0 && result.charAt(len - 1) == ' ')
 158             result.setLength(len - 1);
 159         // whitespaces are already collapsed,
 160         // so all we have to do is to remove the last one character
 161         // if it's a whitespace.
 162 
 163         return result;
 164     }
 165 
 166     /**
 167      * Returns true if the specified string is all whitespace.
 168      */
 169     public static final boolean isWhiteSpace(CharSequence s) {
 170         for( int i=s.length()-1; i>=0; i-- )
 171             if(!isWhiteSpace(s.charAt(i)))
 172                 return false;
 173         return true;
 174     }
 175 
 176     /** returns true if the specified char is a white space character. */
 177     public static final boolean isWhiteSpace(char ch) {
 178         // most of the characters are non-control characters.
 179         // so check that first to quickly return false for most of the cases.
 180         if( ch>0x20 )   return false;
 181 
 182         // other than we have to do four comparisons.
 183         return ch == 0x9 || ch == 0xA || ch == 0xD || ch == 0x20;
 184     }
 185 
 186     /**
 187      * Returns true if the specified char is a white space character
 188      * but not 0x20.
 189      */
 190     protected static final boolean isWhiteSpaceExceptSpace(char ch) {
 191         // most of the characters are non-control characters.
 192         // so check that first to quickly return false for most of the cases.
 193         if( ch>=0x20 )   return false;
 194 
 195         // other than we have to do four comparisons.
 196         return ch == 0x9 || ch == 0xA || ch == 0xD;
 197     }
 198 }