1 /* 2 * Copyright (c) 2010, 2011, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 /* 27 ******************************************************************************* 28 * Copyright (C) 2009-2010, International Business Machines Corporation and * 29 * others. All Rights Reserved. * 30 ******************************************************************************* 31 */ 32 package sun.util.locale; 33 34 import java.util.ArrayList; 35 import java.util.HashMap; 36 import java.util.HashSet; 37 import java.util.List; 38 import java.util.Map; 39 import java.util.Set; 40 41 public final class InternalLocaleBuilder { 42 43 private static final CaseInsensitiveChar PRIVATEUSE_KEY 44 = new CaseInsensitiveChar(LanguageTag.PRIVATEUSE); 45 46 private String language = ""; 47 private String script = ""; 48 private String region = ""; 49 private String variant = ""; 50 51 private Map<CaseInsensitiveChar, String> extensions; 52 private Set<CaseInsensitiveString> uattributes; 53 private Map<CaseInsensitiveString, String> ukeywords; 54 55 56 public InternalLocaleBuilder() { 57 } 58 59 public InternalLocaleBuilder setLanguage(String language) throws LocaleSyntaxException { 60 if (LocaleUtils.isEmpty(language)) { 61 this.language = ""; 62 } else { 63 if (!LanguageTag.isLanguage(language)) { 64 throw new LocaleSyntaxException("Ill-formed language: " + language, 0); 65 } 66 this.language = language; 67 } 68 return this; 69 } 70 71 public InternalLocaleBuilder setScript(String script) throws LocaleSyntaxException { 72 if (LocaleUtils.isEmpty(script)) { 73 this.script = ""; 74 } else { 75 if (!LanguageTag.isScript(script)) { 76 throw new LocaleSyntaxException("Ill-formed script: " + script, 0); 77 } 78 this.script = script; 79 } 80 return this; 81 } 82 83 public InternalLocaleBuilder setRegion(String region) throws LocaleSyntaxException { 84 if (LocaleUtils.isEmpty(region)) { 85 this.region = ""; 86 } else { 87 if (!LanguageTag.isRegion(region)) { 88 throw new LocaleSyntaxException("Ill-formed region: " + region, 0); 89 } 90 this.region = region; 91 } 92 return this; 93 } 94 95 public InternalLocaleBuilder setVariant(String variant) throws LocaleSyntaxException { 96 if (LocaleUtils.isEmpty(variant)) { 97 this.variant = ""; 98 } else { 99 // normalize separators to "_" 100 String var = variant.replaceAll(LanguageTag.SEP, BaseLocale.SEP); 101 int errIdx = checkVariants(var, BaseLocale.SEP); 102 if (errIdx != -1) { 103 throw new LocaleSyntaxException("Ill-formed variant: " + variant, errIdx); 104 } 105 this.variant = var; 106 } 107 return this; 108 } 109 110 public InternalLocaleBuilder addUnicodeLocaleAttribute(String attribute) throws LocaleSyntaxException { 111 if (!UnicodeLocaleExtension.isAttribute(attribute)) { 112 throw new LocaleSyntaxException("Ill-formed Unicode locale attribute: " + attribute); 113 } 114 // Use case insensitive string to prevent duplication 115 if (uattributes == null) { 116 uattributes = new HashSet<>(4); 117 } 118 uattributes.add(new CaseInsensitiveString(attribute)); 119 return this; 120 } 121 122 public InternalLocaleBuilder removeUnicodeLocaleAttribute(String attribute) throws LocaleSyntaxException { 123 if (attribute == null || !UnicodeLocaleExtension.isAttribute(attribute)) { 124 throw new LocaleSyntaxException("Ill-formed Unicode locale attribute: " + attribute); 125 } 126 if (uattributes != null) { 127 uattributes.remove(new CaseInsensitiveString(attribute)); 128 } 129 return this; 130 } 131 132 public InternalLocaleBuilder setUnicodeLocaleKeyword(String key, String type) throws LocaleSyntaxException { 133 if (!UnicodeLocaleExtension.isKey(key)) { 134 throw new LocaleSyntaxException("Ill-formed Unicode locale keyword key: " + key); 135 } 136 137 CaseInsensitiveString cikey = new CaseInsensitiveString(key); 138 if (type == null) { 139 if (ukeywords != null) { 140 // null type is used for remove the key 141 ukeywords.remove(cikey); 142 } 143 } else { 144 if (type.length() != 0) { 145 // normalize separator to "-" 146 String tp = type.replaceAll(BaseLocale.SEP, LanguageTag.SEP); 147 // validate 148 StringTokenIterator itr = new StringTokenIterator(tp, LanguageTag.SEP); 149 while (!itr.isDone()) { 150 String s = itr.current(); 151 if (!UnicodeLocaleExtension.isTypeSubtag(s)) { 152 throw new LocaleSyntaxException("Ill-formed Unicode locale keyword type: " 153 + type, 154 itr.currentStart()); 155 } 156 itr.next(); 157 } 158 } 159 if (ukeywords == null) { 160 ukeywords = new HashMap<>(4); 161 } 162 ukeywords.put(cikey, type); 163 } 164 return this; 165 } 166 167 public InternalLocaleBuilder setExtension(char singleton, String value) throws LocaleSyntaxException { 168 // validate key 169 boolean isBcpPrivateuse = LanguageTag.isPrivateusePrefixChar(singleton); 170 if (!isBcpPrivateuse && !LanguageTag.isExtensionSingletonChar(singleton)) { 171 throw new LocaleSyntaxException("Ill-formed extension key: " + singleton); 172 } 173 174 boolean remove = LocaleUtils.isEmpty(value); 175 CaseInsensitiveChar key = new CaseInsensitiveChar(singleton); 176 177 if (remove) { 178 if (UnicodeLocaleExtension.isSingletonChar(key.value())) { 179 // clear entire Unicode locale extension 180 if (uattributes != null) { 181 uattributes.clear(); 182 } 183 if (ukeywords != null) { 184 ukeywords.clear(); 185 } 186 } else { 187 if (extensions != null && extensions.containsKey(key)) { 188 extensions.remove(key); 189 } 190 } 191 } else { 192 // validate value 193 String val = value.replaceAll(BaseLocale.SEP, LanguageTag.SEP); 194 StringTokenIterator itr = new StringTokenIterator(val, LanguageTag.SEP); 195 while (!itr.isDone()) { 196 String s = itr.current(); 197 boolean validSubtag; 198 if (isBcpPrivateuse) { 199 validSubtag = LanguageTag.isPrivateuseSubtag(s); 200 } else { 201 validSubtag = LanguageTag.isExtensionSubtag(s); 202 } 203 if (!validSubtag) { 204 throw new LocaleSyntaxException("Ill-formed extension value: " + s, 205 itr.currentStart()); 206 } 207 itr.next(); 208 } 209 210 if (UnicodeLocaleExtension.isSingletonChar(key.value())) { 211 setUnicodeLocaleExtension(val); 212 } else { 213 if (extensions == null) { 214 extensions = new HashMap<>(4); 215 } 216 extensions.put(key, val); 217 } 218 } 219 return this; 220 } 221 222 /* 223 * Set extension/private subtags in a single string representation 224 */ 225 public InternalLocaleBuilder setExtensions(String subtags) throws LocaleSyntaxException { 226 if (LocaleUtils.isEmpty(subtags)) { 227 clearExtensions(); 228 return this; 229 } 230 subtags = subtags.replaceAll(BaseLocale.SEP, LanguageTag.SEP); 231 StringTokenIterator itr = new StringTokenIterator(subtags, LanguageTag.SEP); 232 233 List<String> extensions = null; 234 String privateuse = null; 235 236 int parsed = 0; 237 int start; 238 239 // Make a list of extension subtags 240 while (!itr.isDone()) { 241 String s = itr.current(); 242 if (LanguageTag.isExtensionSingleton(s)) { 243 start = itr.currentStart(); 244 String singleton = s; 245 StringBuilder sb = new StringBuilder(singleton); 246 247 itr.next(); 248 while (!itr.isDone()) { 249 s = itr.current(); 250 if (LanguageTag.isExtensionSubtag(s)) { 251 sb.append(LanguageTag.SEP).append(s); 252 parsed = itr.currentEnd(); 253 } else { 254 break; 255 } 256 itr.next(); 257 } 258 259 if (parsed < start) { 260 throw new LocaleSyntaxException("Incomplete extension '" + singleton + "'", 261 start); 262 } 263 264 if (extensions == null) { 265 extensions = new ArrayList<>(4); 266 } 267 extensions.add(sb.toString()); 268 } else { 269 break; 270 } 271 } 272 if (!itr.isDone()) { 273 String s = itr.current(); 274 if (LanguageTag.isPrivateusePrefix(s)) { 275 start = itr.currentStart(); 276 StringBuilder sb = new StringBuilder(s); 277 278 itr.next(); 279 while (!itr.isDone()) { 280 s = itr.current(); 281 if (!LanguageTag.isPrivateuseSubtag(s)) { 282 break; 283 } 284 sb.append(LanguageTag.SEP).append(s); 285 parsed = itr.currentEnd(); 286 287 itr.next(); 288 } 289 if (parsed <= start) { 290 throw new LocaleSyntaxException("Incomplete privateuse:" 291 + subtags.substring(start), 292 start); 293 } else { 294 privateuse = sb.toString(); 295 } 296 } 297 } 298 299 if (!itr.isDone()) { 300 throw new LocaleSyntaxException("Ill-formed extension subtags:" 301 + subtags.substring(itr.currentStart()), 302 itr.currentStart()); 303 } 304 305 return setExtensions(extensions, privateuse); 306 } 307 308 /* 309 * Set a list of BCP47 extensions and private use subtags 310 * BCP47 extensions are already validated and well-formed, but may contain duplicates 311 */ 312 private InternalLocaleBuilder setExtensions(List<String> bcpExtensions, String privateuse) { 313 clearExtensions(); 314 315 if (!LocaleUtils.isEmpty(bcpExtensions)) { 316 Set<CaseInsensitiveChar> done = new HashSet<>(bcpExtensions.size()); 317 for (String bcpExt : bcpExtensions) { 318 CaseInsensitiveChar key = new CaseInsensitiveChar(bcpExt); 319 // ignore duplicates 320 if (!done.contains(key)) { 321 // each extension string contains singleton, e.g. "a-abc-def" 322 if (UnicodeLocaleExtension.isSingletonChar(key.value())) { 323 setUnicodeLocaleExtension(bcpExt.substring(2)); 324 } else { 325 if (extensions == null) { 326 extensions = new HashMap<>(4); 327 } 328 extensions.put(key, bcpExt.substring(2)); 329 } 330 } 331 done.add(key); 332 } 333 } 334 if (privateuse != null && privateuse.length() > 0) { 335 // privateuse string contains prefix, e.g. "x-abc-def" 336 if (extensions == null) { 337 extensions = new HashMap<>(1); 338 } 339 extensions.put(new CaseInsensitiveChar(privateuse), privateuse.substring(2)); 340 } 341 342 return this; 343 } 344 345 /* 346 * Reset Builder's internal state with the given language tag 347 */ 348 public InternalLocaleBuilder setLanguageTag(LanguageTag langtag) { 349 clear(); 350 if (!langtag.getExtlangs().isEmpty()) { 351 language = langtag.getExtlangs().get(0); 352 } else { 353 String lang = langtag.getLanguage(); 354 if (!lang.equals(LanguageTag.UNDETERMINED)) { 355 language = lang; 356 } 357 } 358 script = langtag.getScript(); 359 region = langtag.getRegion(); 360 361 List<String> bcpVariants = langtag.getVariants(); 362 if (!bcpVariants.isEmpty()) { 363 StringBuilder var = new StringBuilder(bcpVariants.get(0)); 364 int size = bcpVariants.size(); 365 for (int i = 1; i < size; i++) { 366 var.append(BaseLocale.SEP).append(bcpVariants.get(i)); 367 } 368 variant = var.toString(); 369 } 370 371 setExtensions(langtag.getExtensions(), langtag.getPrivateuse()); 372 373 return this; 374 } 375 376 public InternalLocaleBuilder setLocale(BaseLocale base, LocaleExtensions localeExtensions) throws LocaleSyntaxException { 377 String language = base.getLanguage(); 378 String script = base.getScript(); 379 String region = base.getRegion(); 380 String variant = base.getVariant(); 381 382 // Special backward compatibility support 383 384 // Exception 1 - ja_JP_JP 385 if (language.equals("ja") && region.equals("JP") && variant.equals("JP")) { 386 // When locale ja_JP_JP is created, ca-japanese is always there. 387 // The builder ignores the variant "JP" 388 assert("japanese".equals(localeExtensions.getUnicodeLocaleType("ca"))); 389 variant = ""; 390 } 391 // Exception 2 - th_TH_TH 392 else if (language.equals("th") && region.equals("TH") && variant.equals("TH")) { 393 // When locale th_TH_TH is created, nu-thai is always there. 394 // The builder ignores the variant "TH" 395 assert("thai".equals(localeExtensions.getUnicodeLocaleType("nu"))); 396 variant = ""; 397 } 398 // Exception 3 - no_NO_NY 399 else if (language.equals("no") && region.equals("NO") && variant.equals("NY")) { 400 // no_NO_NY is a valid locale and used by Java 6 or older versions. 401 // The build ignores the variant "NY" and change the language to "nn". 402 language = "nn"; 403 variant = ""; 404 } 405 406 // Validate base locale fields before updating internal state. 407 // LocaleExtensions always store validated/canonicalized values, 408 // so no checks are necessary. 409 if (language.length() > 0 && !LanguageTag.isLanguage(language)) { 410 throw new LocaleSyntaxException("Ill-formed language: " + language); 411 } 412 413 if (script.length() > 0 && !LanguageTag.isScript(script)) { 414 throw new LocaleSyntaxException("Ill-formed script: " + script); 415 } 416 417 if (region.length() > 0 && !LanguageTag.isRegion(region)) { 418 throw new LocaleSyntaxException("Ill-formed region: " + region); 419 } 420 421 if (variant.length() > 0) { 422 int errIdx = checkVariants(variant, BaseLocale.SEP); 423 if (errIdx != -1) { 424 throw new LocaleSyntaxException("Ill-formed variant: " + variant, errIdx); 425 } 426 } 427 428 // The input locale is validated at this point. 429 // Now, updating builder's internal fields. 430 this.language = language; 431 this.script = script; 432 this.region = region; 433 this.variant = variant; 434 clearExtensions(); 435 436 Set<Character> extKeys = (localeExtensions == null) ? null : localeExtensions.getKeys(); 437 if (extKeys != null) { 438 // map localeExtensions back to builder's internal format 439 for (Character key : extKeys) { 440 Extension e = localeExtensions.getExtension(key); 441 if (e instanceof UnicodeLocaleExtension) { 442 UnicodeLocaleExtension ue = (UnicodeLocaleExtension)e; 443 for (String uatr : ue.getUnicodeLocaleAttributes()) { 444 if (uattributes == null) { 445 uattributes = new HashSet<>(4); 446 } 447 uattributes.add(new CaseInsensitiveString(uatr)); 448 } 449 for (String ukey : ue.getUnicodeLocaleKeys()) { 450 if (ukeywords == null) { 451 ukeywords = new HashMap<>(4); 452 } 453 ukeywords.put(new CaseInsensitiveString(ukey), ue.getUnicodeLocaleType(ukey)); 454 } 455 } else { 456 if (extensions == null) { 457 extensions = new HashMap<>(4); 458 } 459 extensions.put(new CaseInsensitiveChar(key), e.getValue()); 460 } 461 } 462 } 463 return this; 464 } 465 466 public InternalLocaleBuilder clear() { 467 language = ""; 468 script = ""; 469 region = ""; 470 variant = ""; 471 clearExtensions(); 472 return this; 473 } 474 475 public InternalLocaleBuilder clearExtensions() { 476 if (extensions != null) { 477 extensions.clear(); 478 } 479 if (uattributes != null) { 480 uattributes.clear(); 481 } 482 if (ukeywords != null) { 483 ukeywords.clear(); 484 } 485 return this; 486 } 487 488 public BaseLocale getBaseLocale() { 489 String language = this.language; 490 String script = this.script; 491 String region = this.region; 492 String variant = this.variant; 493 494 // Special private use subtag sequence identified by "lvariant" will be 495 // interpreted as Java variant. 496 if (extensions != null) { 497 String privuse = extensions.get(PRIVATEUSE_KEY); 498 if (privuse != null) { 499 StringTokenIterator itr = new StringTokenIterator(privuse, LanguageTag.SEP); 500 boolean sawPrefix = false; 501 int privVarStart = -1; 502 while (!itr.isDone()) { 503 if (sawPrefix) { 504 privVarStart = itr.currentStart(); 505 break; 506 } 507 if (LocaleUtils.caseIgnoreMatch(itr.current(), LanguageTag.PRIVUSE_VARIANT_PREFIX)) { 508 sawPrefix = true; 509 } 510 itr.next(); 511 } 512 if (privVarStart != -1) { 513 StringBuilder sb = new StringBuilder(variant); 514 if (sb.length() != 0) { 515 sb.append(BaseLocale.SEP); 516 } 517 sb.append(privuse.substring(privVarStart).replaceAll(LanguageTag.SEP, 518 BaseLocale.SEP)); 519 variant = sb.toString(); 520 } 521 } 522 } 523 524 return BaseLocale.getInstance(language, script, region, variant); 525 } 526 527 public LocaleExtensions getLocaleExtensions() { 528 if (LocaleUtils.isEmpty(extensions) && LocaleUtils.isEmpty(uattributes) 529 && LocaleUtils.isEmpty(ukeywords)) { 530 return null; 531 } 532 533 LocaleExtensions lext = new LocaleExtensions(extensions, uattributes, ukeywords); 534 return lext.isEmpty() ? null : lext; 535 } 536 537 /* 538 * Remove special private use subtag sequence identified by "lvariant" 539 * and return the rest. Only used by LocaleExtensions 540 */ 541 static String removePrivateuseVariant(String privuseVal) { 542 StringTokenIterator itr = new StringTokenIterator(privuseVal, LanguageTag.SEP); 543 544 // Note: privateuse value "abc-lvariant" is unchanged 545 // because no subtags after "lvariant". 546 547 int prefixStart = -1; 548 boolean sawPrivuseVar = false; 549 while (!itr.isDone()) { 550 if (prefixStart != -1) { 551 // Note: privateuse value "abc-lvariant" is unchanged 552 // because no subtags after "lvariant". 553 sawPrivuseVar = true; 554 break; 555 } 556 if (LocaleUtils.caseIgnoreMatch(itr.current(), LanguageTag.PRIVUSE_VARIANT_PREFIX)) { 557 prefixStart = itr.currentStart(); 558 } 559 itr.next(); 560 } 561 if (!sawPrivuseVar) { 562 return privuseVal; 563 } 564 565 assert(prefixStart == 0 || prefixStart > 1); 566 return (prefixStart == 0) ? null : privuseVal.substring(0, prefixStart -1); 567 } 568 569 /* 570 * Check if the given variant subtags separated by the given 571 * separator(s) are valid 572 */ 573 private int checkVariants(String variants, String sep) { 574 StringTokenIterator itr = new StringTokenIterator(variants, sep); 575 while (!itr.isDone()) { 576 String s = itr.current(); 577 if (!LanguageTag.isVariant(s)) { 578 return itr.currentStart(); 579 } 580 itr.next(); 581 } 582 return -1; 583 } 584 585 /* 586 * Private methods parsing Unicode Locale Extension subtags. 587 * Duplicated attributes/keywords will be ignored. 588 * The input must be a valid extension subtags (excluding singleton). 589 */ 590 private void setUnicodeLocaleExtension(String subtags) { 591 // wipe out existing attributes/keywords 592 if (uattributes != null) { 593 uattributes.clear(); 594 } 595 if (ukeywords != null) { 596 ukeywords.clear(); 597 } 598 599 StringTokenIterator itr = new StringTokenIterator(subtags, LanguageTag.SEP); 600 601 // parse attributes 602 while (!itr.isDone()) { 603 if (!UnicodeLocaleExtension.isAttribute(itr.current())) { 604 break; 605 } 606 if (uattributes == null) { 607 uattributes = new HashSet<>(4); 608 } 609 uattributes.add(new CaseInsensitiveString(itr.current())); 610 itr.next(); 611 } 612 613 // parse keywords 614 CaseInsensitiveString key = null; 615 String type; 616 int typeStart = -1; 617 int typeEnd = -1; 618 while (!itr.isDone()) { 619 if (key != null) { 620 if (UnicodeLocaleExtension.isKey(itr.current())) { 621 // next keyword - emit previous one 622 assert(typeStart == -1 || typeEnd != -1); 623 type = (typeStart == -1) ? "" : subtags.substring(typeStart, typeEnd); 624 if (ukeywords == null) { 625 ukeywords = new HashMap<>(4); 626 } 627 ukeywords.put(key, type); 628 629 // reset keyword info 630 CaseInsensitiveString tmpKey = new CaseInsensitiveString(itr.current()); 631 key = ukeywords.containsKey(tmpKey) ? null : tmpKey; 632 typeStart = typeEnd = -1; 633 } else { 634 if (typeStart == -1) { 635 typeStart = itr.currentStart(); 636 } 637 typeEnd = itr.currentEnd(); 638 } 639 } else if (UnicodeLocaleExtension.isKey(itr.current())) { 640 // 1. first keyword or 641 // 2. next keyword, but previous one was duplicate 642 key = new CaseInsensitiveString(itr.current()); 643 if (ukeywords != null && ukeywords.containsKey(key)) { 644 // duplicate 645 key = null; 646 } 647 } 648 649 if (!itr.hasNext()) { 650 if (key != null) { 651 // last keyword 652 assert(typeStart == -1 || typeEnd != -1); 653 type = (typeStart == -1) ? "" : subtags.substring(typeStart, typeEnd); 654 if (ukeywords == null) { 655 ukeywords = new HashMap<>(4); 656 } 657 ukeywords.put(key, type); 658 } 659 break; 660 } 661 662 itr.next(); 663 } 664 } 665 666 static final class CaseInsensitiveString { 667 private final String str, lowerStr; 668 669 CaseInsensitiveString(String s) { 670 str = s; 671 lowerStr = LocaleUtils.toLowerString(s); 672 } 673 674 public String value() { 675 return str; 676 } 677 678 @Override 679 public int hashCode() { 680 return lowerStr.hashCode(); 681 } 682 683 @Override 684 public boolean equals(Object obj) { 685 if (this == obj) { 686 return true; 687 } 688 if (!(obj instanceof CaseInsensitiveString)) { 689 return false; 690 } 691 return lowerStr.equals(((CaseInsensitiveString)obj).lowerStr); 692 } 693 } 694 695 static final class CaseInsensitiveChar { 696 private final char ch, lowerCh; 697 698 /** 699 * Constructs a CaseInsensitiveChar with the first char of the 700 * given s. 701 */ 702 private CaseInsensitiveChar(String s) { 703 this(s.charAt(0)); 704 } 705 706 CaseInsensitiveChar(char c) { 707 ch = c; 708 lowerCh = LocaleUtils.toLower(ch); 709 } 710 711 public char value() { 712 return ch; 713 } 714 715 @Override 716 public int hashCode() { 717 return lowerCh; 718 } 719 720 @Override 721 public boolean equals(Object obj) { 722 if (this == obj) { 723 return true; 724 } 725 if (!(obj instanceof CaseInsensitiveChar)) { 726 return false; 727 } 728 return lowerCh == ((CaseInsensitiveChar)obj).lowerCh; 729 } 730 } 731 }