1 /* 2 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package javax.xml.bind; 27 28 /** 29 * Processes white space normalization. 30 * 31 * @since 1.0 32 */ 33 abstract class WhiteSpaceProcessor { 34 35 // benchmarking (see test/src/ReplaceTest.java in the CVS Attic) 36 // showed that this code is slower than the current code. 37 // 38 // public static String replace(String text) { 39 // final int len = text.length(); 40 // StringBuffer result = new StringBuffer(len); 41 // 42 // for (int i = 0; i < len; i++) { 43 // char ch = text.charAt(i); 44 // if (isWhiteSpace(ch)) 45 // result.append(' '); 46 // else 47 // result.append(ch); 48 // } 49 // 50 // return result.toString(); 51 // } 52 53 public static String replace(String text) { 54 return replace( (CharSequence)text ).toString(); 55 } 56 57 /** 58 * @since 2.0 59 */ 60 public static CharSequence replace(CharSequence text) { 61 int i=text.length()-1; 62 63 // look for the first whitespace char. 64 while( i>=0 && !isWhiteSpaceExceptSpace(text.charAt(i)) ) 65 i--; 66 67 if( i<0 ) 68 // no such whitespace. replace(text)==text. 69 return text; 70 71 // we now know that we need to modify the text. 72 // allocate a char array to do it. 73 StringBuilder buf = new StringBuilder(text); 74 75 buf.setCharAt(i--,' '); 76 for( ; i>=0; i-- ) 77 if( isWhiteSpaceExceptSpace(buf.charAt(i))) 78 buf.setCharAt(i,' '); 79 80 return new String(buf); 81 } 82 83 /** 84 * Equivalent of {@link String#trim()}. 85 * @since 2.0 86 */ 87 public static CharSequence trim(CharSequence text) { 88 int len = text.length(); 89 int start = 0; 90 91 while( start<len && isWhiteSpace(text.charAt(start)) ) 92 start++; 93 94 int end = len-1; 95 96 while( end>start && isWhiteSpace(text.charAt(end)) ) 97 end--; 98 99 if(start==0 && end==len-1) 100 return text; // no change 101 else 102 return text.subSequence(start,end+1); 103 } 104 105 public static String collapse(String text) { 106 return collapse( (CharSequence)text ).toString(); 107 } 108 109 /** 110 * This is usually the biggest processing bottleneck. 111 * 112 * @since 2.0 113 */ 114 public static CharSequence collapse(CharSequence text) { 115 int len = text.length(); 116 117 // most of the texts are already in the collapsed form. 118 // so look for the first whitespace in the hope that we will 119 // never see it. 120 int s=0; 121 while(s<len) { 122 if(isWhiteSpace(text.charAt(s))) 123 break; 124 s++; 125 } 126 if(s==len) 127 // the input happens to be already collapsed. 128 return text; 129 130 // we now know that the input contains spaces. 131 // let's sit down and do the collapsing normally. 132 133 StringBuilder result = new StringBuilder(len /*allocate enough size to avoid re-allocation*/ ); 134 135 if(s!=0) { 136 for( int i=0; i<s; i++ ) 137 result.append(text.charAt(i)); 138 result.append(' '); 139 } 140 141 boolean inStripMode = true; 142 for (int i = s+1; i < len; i++) { 143 char ch = text.charAt(i); 144 boolean b = isWhiteSpace(ch); 145 if (inStripMode && b) 146 continue; // skip this character 147 148 inStripMode = b; 149 if (inStripMode) 150 result.append(' '); 151 else 152 result.append(ch); 153 } 154 155 // remove trailing whitespaces 156 len = result.length(); 157 if (len > 0 && result.charAt(len - 1) == ' ') 158 result.setLength(len - 1); 159 // whitespaces are already collapsed, 160 // so all we have to do is to remove the last one character 161 // if it's a whitespace. 162 163 return result; 164 } 165 166 /** 167 * Returns true if the specified string is all whitespace. 168 */ 169 public static final boolean isWhiteSpace(CharSequence s) { 170 for( int i=s.length()-1; i>=0; i-- ) 171 if(!isWhiteSpace(s.charAt(i))) 172 return false; 173 return true; 174 } 175 176 /** returns true if the specified char is a white space character. */ 177 public static final boolean isWhiteSpace(char ch) { 178 // most of the characters are non-control characters. 179 // so check that first to quickly return false for most of the cases. 180 if( ch>0x20 ) return false; 181 182 // other than we have to do four comparisons. 183 return ch == 0x9 || ch == 0xA || ch == 0xD || ch == 0x20; 184 } 185 186 /** 187 * Returns true if the specified char is a white space character 188 * but not 0x20. 189 */ 190 protected static final boolean isWhiteSpaceExceptSpace(char ch) { 191 // most of the characters are non-control characters. 192 // so check that first to quickly return false for most of the cases. 193 if( ch>=0x20 ) return false; 194 195 // other than we have to do four comparisons. 196 return ch == 0x9 || ch == 0xA || ch == 0xD; 197 } 198 }