1 /* 2 * Copyright (c) 1996, 2001, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 /* 27 * (C) Copyright Taligent, Inc. 1996,1997 - All Rights Reserved 28 * (C) Copyright IBM Corp. 1996, 1997 - All Rights Reserved 29 * 30 * The original version of this source code and documentation is copyrighted 31 * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These 32 * materials are provided under terms of a License Agreement between Taligent 33 * and Sun. This technology is protected by multiple US and International 34 * patents. This notice and attribution to Taligent may not be removed. 35 * Taligent is a registered trademark of Taligent, Inc. 36 * 37 */ 38 39 package java.text; 40 /** 41 * CollationRules contains the default en_US collation rules as a base 42 * for building other collation tables. 43 * <p>Note that decompositions are done before these rules are used, 44 * so they do not have to contain accented characters, such as A-grave. 45 * @see RuleBasedCollator 46 * @see LocaleElements 47 * @author Helena Shih, Mark Davis 48 */ 49 final class CollationRules { 50 final static String DEFAULTRULES = new String( 51 "" // no FRENCH accent order by default, add in French Delta 52 // IGNORABLES (up to first < character) 53 // COMPLETELY IGNORE format characters 54 + "='\u200B'=\u200C=\u200D=\u200E=\u200F" 55 // Control Characters 56 + "=\u0000 =\u0001 =\u0002 =\u0003 =\u0004" //null, .. eot 57 + "=\u0005 =\u0006 =\u0007 =\u0008 ='\u0009'" //enq, ... 58 + "='\u000b' =\u000e" //vt,, so 59 + "=\u000f ='\u0010' =\u0011 =\u0012 =\u0013" //si, dle, dc1, dc2, dc3 60 + "=\u0014 =\u0015 =\u0016 =\u0017 =\u0018" //dc4, nak, syn, etb, can 61 + "=\u0019 =\u001a =\u001b =\u001c =\u001d" //em, sub, esc, fs, gs 62 + "=\u001e =\u001f =\u007f" //rs, us, del 63 //....then the C1 Latin 1 reserved control codes 64 + "=\u0080 =\u0081 =\u0082 =\u0083 =\u0084 =\u0085" 65 + "=\u0086 =\u0087 =\u0088 =\u0089 =\u008a =\u008b" 66 + "=\u008c =\u008d =\u008e =\u008f =\u0090 =\u0091" 67 + "=\u0092 =\u0093 =\u0094 =\u0095 =\u0096 =\u0097" 68 + "=\u0098 =\u0099 =\u009a =\u009b =\u009c =\u009d" 69 + "=\u009e =\u009f" 70 // IGNORE except for secondary, tertiary difference 71 // Spaces 72 + ";'\u0020';'\u00A0'" // spaces 73 + ";'\u2000';'\u2001';'\u2002';'\u2003';'\u2004'" // spaces 74 + ";'\u2005';'\u2006';'\u2007';'\u2008';'\u2009'" // spaces 75 + ";'\u200A';'\u3000';'\uFEFF'" // spaces 76 + ";'\r' ;'\t' ;'\n';'\f';'\u000b'" // whitespace 77 78 // Non-spacing accents 79 80 + ";\u0301" // non-spacing acute accent 81 + ";\u0300" // non-spacing grave accent 82 + ";\u0306" // non-spacing breve accent 83 + ";\u0302" // non-spacing circumflex accent 84 + ";\u030c" // non-spacing caron/hacek accent 85 + ";\u030a" // non-spacing ring above accent 86 + ";\u030d" // non-spacing vertical line above 87 + ";\u0308" // non-spacing diaeresis accent 88 + ";\u030b" // non-spacing double acute accent 89 + ";\u0303" // non-spacing tilde accent 90 + ";\u0307" // non-spacing dot above/overdot accent 91 + ";\u0304" // non-spacing macron accent 92 + ";\u0337" // non-spacing short slash overlay (overstruck diacritic) 93 + ";\u0327" // non-spacing cedilla accent 94 + ";\u0328" // non-spacing ogonek accent 95 + ";\u0323" // non-spacing dot-below/underdot accent 96 + ";\u0332" // non-spacing underscore/underline accent 97 // with the rest of the general diacritical marks in binary order 98 + ";\u0305" // non-spacing overscore/overline 99 + ";\u0309" // non-spacing hook above 100 + ";\u030e" // non-spacing double vertical line above 101 + ";\u030f" // non-spacing double grave 102 + ";\u0310" // non-spacing chandrabindu 103 + ";\u0311" // non-spacing inverted breve 104 + ";\u0312" // non-spacing turned comma above/cedilla above 105 + ";\u0313" // non-spacing comma above 106 + ";\u0314" // non-spacing reversed comma above 107 + ";\u0315" // non-spacing comma above right 108 + ";\u0316" // non-spacing grave below 109 + ";\u0317" // non-spacing acute below 110 + ";\u0318" // non-spacing left tack below 111 + ";\u0319" // non-spacing tack below 112 + ";\u031a" // non-spacing left angle above 113 + ";\u031b" // non-spacing horn 114 + ";\u031c" // non-spacing left half ring below 115 + ";\u031d" // non-spacing up tack below 116 + ";\u031e" // non-spacing down tack below 117 + ";\u031f" // non-spacing plus sign below 118 + ";\u0320" // non-spacing minus sign below 119 + ";\u0321" // non-spacing palatalized hook below 120 + ";\u0322" // non-spacing retroflex hook below 121 + ";\u0324" // non-spacing double dot below 122 + ";\u0325" // non-spacing ring below 123 + ";\u0326" // non-spacing comma below 124 + ";\u0329" // non-spacing vertical line below 125 + ";\u032a" // non-spacing bridge below 126 + ";\u032b" // non-spacing inverted double arch below 127 + ";\u032c" // non-spacing hacek below 128 + ";\u032d" // non-spacing circumflex below 129 + ";\u032e" // non-spacing breve below 130 + ";\u032f" // non-spacing inverted breve below 131 + ";\u0330" // non-spacing tilde below 132 + ";\u0331" // non-spacing macron below 133 + ";\u0333" // non-spacing double underscore 134 + ";\u0334" // non-spacing tilde overlay 135 + ";\u0335" // non-spacing short bar overlay 136 + ";\u0336" // non-spacing long bar overlay 137 + ";\u0338" // non-spacing long slash overlay 138 + ";\u0339" // non-spacing right half ring below 139 + ";\u033a" // non-spacing inverted bridge below 140 + ";\u033b" // non-spacing square below 141 + ";\u033c" // non-spacing seagull below 142 + ";\u033d" // non-spacing x above 143 + ";\u033e" // non-spacing vertical tilde 144 + ";\u033f" // non-spacing double overscore 145 //+ ";\u0340" // non-spacing grave tone mark == \u0300 146 //+ ";\u0341" // non-spacing acute tone mark == \u0301 147 + ";\u0342;" 148 //+ "\u0343;" // == \u0313 149 + "\u0344;\u0345;\u0360;\u0361" // newer 150 + ";\u0483;\u0484;\u0485;\u0486" // Cyrillic accents 151 152 + ";\u20D0;\u20D1;\u20D2" // symbol accents 153 + ";\u20D3;\u20D4;\u20D5" // symbol accents 154 + ";\u20D6;\u20D7;\u20D8" // symbol accents 155 + ";\u20D9;\u20DA;\u20DB" // symbol accents 156 + ";\u20DC;\u20DD;\u20DE" // symbol accents 157 + ";\u20DF;\u20E0;\u20E1" // symbol accents 158 159 + ",'\u002D';\u00AD" // dashes 160 + ";\u2010;\u2011;\u2012" // dashes 161 + ";\u2013;\u2014;\u2015" // dashes 162 + ";\u2212" // dashes 163 164 // other punctuation 165 166 + "<'\u005f'" // underline/underscore (spacing) 167 + "<\u00af" // overline or macron (spacing) 168 + "<'\u002c'" // comma (spacing) 169 + "<'\u003b'" // semicolon 170 + "<'\u003a'" // colon 171 + "<'\u0021'" // exclamation point 172 + "<\u00a1" // inverted exclamation point 173 + "<'\u003f'" // question mark 174 + "<\u00bf" // inverted question mark 175 + "<'\u002f'" // slash 176 + "<'\u002e'" // period/full stop 177 + "<\u00b4" // acute accent (spacing) 178 + "<'\u0060'" // grave accent (spacing) 179 + "<'\u005e'" // circumflex accent (spacing) 180 + "<\u00a8" // diaresis/umlaut accent (spacing) 181 + "<'\u007e'" // tilde accent (spacing) 182 + "<\u00b7" // middle dot (spacing) 183 + "<\u00b8" // cedilla accent (spacing) 184 + "<'\u0027'" // apostrophe 185 + "<'\"'" // quotation marks 186 + "<\u00ab" // left angle quotes 187 + "<\u00bb" // right angle quotes 188 + "<'\u0028'" // left parenthesis 189 + "<'\u0029'" // right parenthesis 190 + "<'\u005b'" // left bracket 191 + "<'\u005d'" // right bracket 192 + "<'\u007b'" // left brace 193 + "<'\u007d'" // right brace 194 + "<\u00a7" // section symbol 195 + "<\u00b6" // paragraph symbol 196 + "<\u00a9" // copyright symbol 197 + "<\u00ae" // registered trademark symbol 198 + "<'\u0040'" // at sign 199 + "<\u00a4" // international currency symbol 200 + "<\u0e3f" // baht sign 201 + "<\u00a2" // cent sign 202 + "<\u20a1" // colon sign 203 + "<\u20a2" // cruzeiro sign 204 + "<'\u0024'" // dollar sign 205 + "<\u20ab" // dong sign 206 + "<\u20ac" // euro sign 207 + "<\u20a3" // franc sign 208 + "<\u20a4" // lira sign 209 + "<\u20a5" // mill sign 210 + "<\u20a6" // naira sign 211 + "<\u20a7" // peseta sign 212 + "<\u00a3" // pound-sterling sign 213 + "<\u20a8" // rupee sign 214 + "<\u20aa" // new shekel sign 215 + "<\u20a9" // won sign 216 + "<\u00a5" // yen sign 217 + "<'\u002a'" // asterisk 218 + "<'\\'" // backslash 219 + "<'\u0026'" // ampersand 220 + "<'\u0023'" // number sign 221 + "<'\u0025'" // percent sign 222 + "<'\u002b'" // plus sign 223 + "<\u00b1" // plus-or-minus sign 224 + "<\u00f7" // divide sign 225 + "<\u00d7" // multiply sign 226 + "<'\u003c'" // less-than sign 227 + "<'\u003d'" // equal sign 228 + "<'\u003e'" // greater-than sign 229 + "<\u00ac" // end of line symbol/logical NOT symbol 230 + "<'\u007c'" // vertical line/logical OR symbol 231 + "<\u00a6" // broken vertical line 232 + "<\u00b0" // degree symbol 233 + "<\u00b5" // micro symbol 234 235 // NUMERICS 236 237 + "<0<1<2<3<4<5<6<7<8<9" 238 + "<\u00bc<\u00bd<\u00be" // 1/4,1/2,3/4 fractions 239 240 // NON-IGNORABLES 241 + "<a,A" 242 + "<b,B" 243 + "<c,C" 244 + "<d,D" 245 + "<\u00F0,\u00D0" // eth 246 + "<e,E" 247 + "<f,F" 248 + "<g,G" 249 + "<h,H" 250 + "<i,I" 251 + "<j,J" 252 + "<k,K" 253 + "<l,L" 254 + "<m,M" 255 + "<n,N" 256 + "<o,O" 257 + "<p,P" 258 + "<q,Q" 259 + "<r,R" 260 + "<s, S & SS,\u00DF" // s-zet 261 + "<t,T" 262 + "& TH, \u00DE &TH, \u00FE " // thorn 263 + "<u,U" 264 + "<v,V" 265 + "<w,W" 266 + "<x,X" 267 + "<y,Y" 268 + "<z,Z" 269 + "&AE,\u00C6" // ae & AE ligature 270 + "&AE,\u00E6" 271 + "&OE,\u0152" // oe & OE ligature 272 + "&OE,\u0153" 273 ); 274 }