1 /* 2 * Copyright (c) 2017, 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package sun.security.util; 27 28 import java.io.BufferedReader; 29 import java.io.File; 30 import java.io.FileInputStream; 31 import java.io.FileNotFoundException; 32 import java.io.InputStream; 33 import java.io.InputStreamReader; 34 import java.io.IOException; 35 import java.security.AccessController; 36 import java.security.PrivilegedAction; 37 import java.util.Arrays; 38 import java.util.HashSet; 39 import java.util.Iterator; 40 import java.util.LinkedList; 41 import java.util.List; 42 import java.util.Map; 43 import java.util.Set; 44 import java.util.concurrent.ConcurrentHashMap; 45 import java.util.zip.ZipEntry; 46 import java.util.zip.ZipInputStream; 47 48 import sun.security.ssl.SSLLogger; 49 50 /** 51 * Allows public suffixes and registered domains to be determined from a 52 * string that represents a target domain name. A database of known 53 * registered suffixes is used to perform the determination. 54 * 55 * A public suffix is defined as the rightmost part of a domain name 56 * that is not owned by an individual registrant. Examples of 57 * public suffixes are: 58 * com 59 * edu 60 * co.uk 61 * k12.ak.us 62 * com.tw 63 * \u7db2\u8def.tw 64 * 65 * Public suffixes effectively denote registration authorities. 66 * 67 * A registered domain is a public suffix preceded by one domain label 68 * and a ".". Examples are: 69 * oracle.com 70 * mit.edu 71 * 72 * The internal database is derived from the information maintained at 73 * http://publicsuffix.org. The information is fixed for a particular 74 * JDK installation, but may be updated in future releases or updates. 75 * 76 * Because of the large number of top-level domains (TLDs) and public 77 * suffix rules, we only load the rules on demand -- from a Zip file 78 * containing an entry for each TLD. 79 * 80 * As each entry is loaded, its data is stored permanently in a cache. 81 * 82 * The containment hierarchy for the data is shown below: 83 * 84 * Rules --> contains all the rules for a particular TLD 85 * RuleSet --> contains all the rules that match 1 label 86 * RuleSet --> contains all the rules that match 2 labels 87 * RuleSet --> contains all the rules that match 3 labels 88 * : 89 * RuleSet --> contains all the rules that match N labels 90 * HashSet of rules, where each rule is an exception rule, a "normal" 91 * rule, a wildcard rule (rules that contain a wildcard prefix only), 92 * or a LinkedList of "other" rules 93 * 94 * The general matching algorithm tries to find a longest match. So, the 95 * search begins at the RuleSet with the most labels, and works backwards. 96 * 97 * Exceptions take priority over all other rules, and if a Rule contains 98 * any exceptions, then even if we find a "normal" match, we search all 99 * other RuleSets for exceptions. It is assumed that all other rules don't 100 * intersect/overlap. If this happens, a match will be returned, but not 101 * necessarily the expected one. For a further explanation of the rules, 102 * see http://publicsuffix.org/list/. 103 * 104 * The "other" rules are for the (possible future) case where wildcards 105 * are located in a rule any place other than the beginning. 106 */ 107 108 class DomainName { 109 /** 110 * For efficiency, the set of rules for each TLD is kept 111 * in text files and only loaded if needed. 112 */ 113 private static final Map<String, Rules> cache = new ConcurrentHashMap<>(); 114 115 private DomainName() {} 116 117 /** 118 * Returns the registered domain of the specified domain. 119 * 120 * @param domain the domain name 121 * @return the registered domain, or null if not known or not registerable 122 * @throws NullPointerException if domain is null 123 */ 124 public static RegisteredDomain registeredDomain(String domain) { 125 Match match = getMatch(domain); 126 return (match != null) ? match.registeredDomain() : null; 127 } 128 129 private static Match getMatch(String domain) { 130 if (domain == null) { 131 throw new NullPointerException(); 132 } 133 Rules rules = Rules.getRules(domain); 134 return rules == null ? null : rules.match(domain); 135 } 136 137 /** 138 * A Rules object contains a list of rules for a particular TLD. 139 * 140 * Rules are stored in a linked list of RuleSet objects. The list is 141 * indexed according to the number of labels in the name (minus one) 142 * such that all rules with the same number of labels are stored 143 * in the same RuleSet. 144 * 145 * Doing this means we can find the longest match first, and also we 146 * can stop comparing as soon as we find a match. 147 */ 148 private static class Rules { 149 150 private final LinkedList<RuleSet> ruleSets = new LinkedList<>(); 151 private final boolean hasExceptions; 152 153 private Rules(InputStream is) throws IOException { 154 InputStreamReader isr = new InputStreamReader(is, "UTF-8"); 155 BufferedReader reader = new BufferedReader(isr); 156 boolean hasExceptions = false; 157 158 String line; 159 int type = reader.read(); 160 while (type != -1 && (line = reader.readLine()) != null) { 161 int numLabels = RuleSet.numLabels(line); 162 if (numLabels != 0) { 163 RuleSet ruleset = getRuleSet(numLabels - 1); 164 ruleset.addRule(type, line); 165 hasExceptions |= ruleset.hasExceptions; 166 } 167 type = reader.read(); 168 } 169 this.hasExceptions = hasExceptions; 170 } 171 172 static Rules getRules(String domain) { 173 String tld = getTopLevelDomain(domain); 174 if (tld.isEmpty()) { 175 return null; 176 } 177 return cache.computeIfAbsent(tld, k -> createRules(tld)); 178 } 179 180 private static String getTopLevelDomain(String domain) { 181 int n = domain.lastIndexOf('.'); 182 if (n == -1) { 183 return domain; 184 } 185 return domain.substring(n + 1); 186 } 187 188 private static Rules createRules(String tld) { 189 try (InputStream pubSuffixStream = getPubSuffixStream()) { 190 if (pubSuffixStream == null) { 191 return null; 192 } 193 return getRules(tld, new ZipInputStream(pubSuffixStream)); 194 } catch (IOException e) { 195 if (SSLLogger.isOn && SSLLogger.isOn("ssl")) { 196 SSLLogger.fine( 197 "cannot parse public suffix data for " + tld + 198 ": " + e.getMessage()); 199 } 200 return null; 201 } 202 } 203 204 private static InputStream getPubSuffixStream() { 205 InputStream is = AccessController.doPrivileged( 206 new PrivilegedAction<>() { 207 @Override 208 public InputStream run() { 209 File f = new File(System.getProperty("java.home"), 210 "lib/security/public_suffix_list.dat"); 211 try { 212 return new FileInputStream(f); 213 } catch (FileNotFoundException e) { 214 return null; 215 } 216 } 217 } 218 ); 219 if (is == null) { 220 if (SSLLogger.isOn && SSLLogger.isOn("ssl") && 221 SSLLogger.isOn("trustmanager")) { 222 SSLLogger.fine( 223 "lib/security/public_suffix_list.dat not found"); 224 } 225 } 226 return is; 227 } 228 229 private static Rules getRules(String tld, 230 ZipInputStream zis) throws IOException { 231 boolean found = false; 232 ZipEntry ze = zis.getNextEntry(); 233 while (ze != null && !found) { 234 if (ze.getName().equals(tld)) { 235 found = true; 236 } else { 237 ze = zis.getNextEntry(); 238 } 239 } 240 if (!found) { 241 if (SSLLogger.isOn && SSLLogger.isOn("ssl")) { 242 SSLLogger.fine("Domain " + tld + " not found"); 243 } 244 return null; 245 } 246 return new Rules(zis); 247 } 248 249 /** 250 * Return the requested RuleSet. If it hasn't been created yet, 251 * create it and any RuleSets leading up to it. 252 */ 253 private RuleSet getRuleSet(int index) { 254 if (index < ruleSets.size()) { 255 return ruleSets.get(index); 256 } 257 RuleSet r = null; 258 for (int i = ruleSets.size(); i <= index; i++) { 259 r = new RuleSet(i + 1); 260 ruleSets.add(r); 261 } 262 return r; 263 } 264 265 /** 266 * Find a match for the target string. 267 */ 268 Match match(String domain) { 269 // Start at the end of the rules list, looking for longest match. 270 // After we find a normal match, we only look for exceptions. 271 Match possibleMatch = null; 272 273 Iterator<RuleSet> it = ruleSets.descendingIterator(); 274 while (it.hasNext()) { 275 RuleSet ruleSet = it.next(); 276 Match match = ruleSet.match(domain); 277 if (match != null) { 278 if (match.type() == Rule.Type.EXCEPTION || !hasExceptions) { 279 return match; 280 } 281 if (possibleMatch == null) { 282 possibleMatch = match; 283 } 284 } 285 } 286 return possibleMatch; 287 } 288 289 /** 290 * Represents a set of rules with the same number of labels 291 * and for a particular TLD. 292 * 293 * Examples: 294 * numLabels = 2 295 * names: co.uk, ac.uk 296 * wildcards *.de (only "de" stored in HashSet) 297 * exceptions: !foo.de (stored as "foo.de") 298 */ 299 private static class RuleSet { 300 // the number of labels in this ruleset 301 private final int numLabels; 302 private final Set<Rule> rules = new HashSet<>(); 303 boolean hasExceptions = false; 304 private static final RegisteredDomain.Type[] AUTHS = 305 RegisteredDomain.Type.values(); 306 307 RuleSet(int n) { 308 numLabels = n; 309 } 310 311 void addRule(int auth, String rule) { 312 if (rule.startsWith("!")) { 313 rules.add(new Rule(rule.substring(1), Rule.Type.EXCEPTION, 314 AUTHS[auth])); 315 hasExceptions = true; 316 } else if (rule.startsWith("*.") && 317 rule.lastIndexOf('*') == 0) { 318 rules.add(new Rule(rule.substring(2), Rule.Type.WILDCARD, 319 AUTHS[auth])); 320 } else if (rule.indexOf('*') == -1) { 321 // a "normal" label 322 rules.add(new Rule(rule, Rule.Type.NORMAL, AUTHS[auth])); 323 } else { 324 // There is a wildcard in a non-leading label. This case 325 // doesn't currently exist, but we need to handle it anyway. 326 rules.add(new OtherRule(rule, AUTHS[auth], split(rule))); 327 } 328 } 329 330 Match match(String domain) { 331 Match match = null; 332 for (Rule rule : rules) { 333 switch (rule.type) { 334 case NORMAL: 335 if (match == null) { 336 match = matchNormal(domain, rule); 337 } 338 break; 339 case WILDCARD: 340 if (match == null) { 341 match = matchWildcard(domain, rule); 342 } 343 break; 344 case OTHER: 345 if (match == null) { 346 match = matchOther(domain, rule); 347 } 348 break; 349 case EXCEPTION: 350 Match excMatch = matchException(domain, rule); 351 if (excMatch != null) { 352 return excMatch; 353 } 354 break; 355 } 356 } 357 return match; 358 } 359 360 private static LinkedList<String> split(String rule) { 361 String[] labels = rule.split("\\."); 362 return new LinkedList<>(Arrays.asList(labels)); 363 } 364 365 private static int numLabels(String rule) { 366 if (rule.equals("")) { 367 return 0; 368 } 369 int len = rule.length(); 370 int count = 0; 371 int index = 0; 372 while (index < len) { 373 int pos; 374 if ((pos = rule.indexOf('.', index)) == -1) { 375 return count + 1; 376 } 377 index = pos + 1; 378 count++; 379 } 380 return count; 381 } 382 383 /** 384 * Check for a match with an explicit name rule or a wildcard rule 385 * (i.e., a non-exception rule). 386 */ 387 private Match matchNormal(String domain, Rule rule) { 388 int index = labels(domain, numLabels); 389 if (index == -1) { 390 return null; 391 } 392 393 // Check for explicit names. 394 String substring = domain.substring(index); 395 if (rule.domain.equals(substring)) { 396 return new CommonMatch(domain, rule, index); 397 } 398 399 return null; 400 } 401 402 private Match matchWildcard(String domain, Rule rule) { 403 // Now check for wildcards. In this case, there is one fewer 404 // label than numLabels. 405 int index = labels(domain, numLabels - 1); 406 if (index > 0) { 407 String substring = domain.substring(index); 408 409 if (rule.domain.equals(substring)) { 410 return new CommonMatch(domain, rule, 411 labels(domain, numLabels)); 412 } 413 } 414 415 return null; 416 } 417 418 /** 419 * Check for a match with an exception rule. 420 */ 421 private Match matchException(String domain, Rule rule) { 422 int index = labels(domain, numLabels); 423 if (index == -1) { 424 return null; 425 } 426 String substring = domain.substring(index); 427 428 if (rule.domain.equals(substring)) { 429 return new CommonMatch(domain, rule, 430 labels(domain, numLabels - 1)); 431 } 432 433 return null; 434 } 435 436 /** 437 * A left-to-right comparison of labels. 438 * The simplest approach to doing match() would be to 439 * use a descending iterator giving a right-to-left comparison. 440 * But, it's more efficient to do left-to-right compares 441 * because the left most labels are the ones most likely to be 442 * different. We just have to figure out which label to start at. 443 */ 444 private Match matchOther(String domain, Rule rule) { 445 OtherRule otherRule = (OtherRule)rule; 446 LinkedList<String> target = split(domain); 447 448 int diff = target.size() - numLabels; 449 if (diff < 0) { 450 return null; 451 } 452 453 boolean found = true; 454 for (int i = 0; i < numLabels; i++) { 455 String ruleLabel = otherRule.labels.get(i); 456 String targetLabel = target.get(i + diff); 457 458 if (ruleLabel.charAt(0) != '*' && 459 !ruleLabel.equalsIgnoreCase(targetLabel)) { 460 found = false; 461 break; 462 } 463 } 464 if (found) { 465 return new OtherMatch(rule, numLabels, target); 466 } 467 return null; 468 } 469 470 /** 471 * Returns a substring (index) with the n right-most labels from s. 472 * Returns -1 if s does not have at least n labels, 0, if the 473 * substring is s. 474 */ 475 private static int labels(String s, int n) { 476 if (n < 1) { 477 return -1; 478 } 479 int index = s.length(); 480 for (int i = 0; i < n; i++) { 481 int next = s.lastIndexOf('.', index); 482 if (next == -1) { 483 if (i == n - 1) { 484 return 0; 485 } else { 486 return -1; 487 } 488 } 489 index = next - 1; 490 } 491 return index + 2; 492 } 493 } 494 } 495 496 private static class Rule { 497 enum Type { EXCEPTION, NORMAL, OTHER, WILDCARD } 498 499 String domain; 500 Type type; 501 RegisteredDomain.Type auth; 502 Rule(String domain, Type type, RegisteredDomain.Type auth) { 503 this.domain = domain; 504 this.type = type; 505 this.auth = auth; 506 } 507 } 508 509 private static class OtherRule extends Rule { 510 List<String> labels; 511 OtherRule(String domain, RegisteredDomain.Type auth, 512 List<String> labels) { 513 super(domain, Type.OTHER, auth); 514 this.labels = labels; 515 } 516 } 517 518 /** 519 * Represents a string's match with a rule in the public suffix list. 520 */ 521 private interface Match { 522 RegisteredDomain registeredDomain(); 523 Rule.Type type(); 524 } 525 526 private static class RegisteredDomainImpl implements RegisteredDomain { 527 private final String name; 528 private final Type type; 529 private final String publicSuffix; 530 RegisteredDomainImpl(String name, Type type, String publicSuffix) { 531 this.name = name; 532 this.type = type; 533 this.publicSuffix = publicSuffix; 534 } 535 @Override 536 public String name() { 537 return name; 538 } 539 @Override 540 public Type type() { 541 return type; 542 } 543 @Override 544 public String publicSuffix() { 545 return publicSuffix; 546 } 547 } 548 549 /** 550 * Represents a match against a standard rule in the public suffix list. 551 * A standard rule is an explicit name, a wildcard rule with a wildcard 552 * only in the leading label, or an exception rule. 553 */ 554 private static class CommonMatch implements Match { 555 private String domain; 556 private int publicSuffix; // index to 557 private int registeredDomain; // index to 558 private final Rule rule; 559 560 CommonMatch(String domain, Rule rule, int publicSuffix) { 561 this.domain = domain; 562 this.publicSuffix = publicSuffix; 563 this.rule = rule; 564 // now locate the previous label 565 registeredDomain = domain.lastIndexOf('.', publicSuffix - 2); 566 if (registeredDomain == -1) { 567 registeredDomain = 0; 568 } else { 569 registeredDomain++; 570 } 571 } 572 573 @Override 574 public RegisteredDomain registeredDomain() { 575 if (publicSuffix == 0) { 576 return null; 577 } 578 return new RegisteredDomainImpl(domain.substring(registeredDomain), 579 rule.auth, 580 domain.substring(publicSuffix)); 581 } 582 583 @Override 584 public Rule.Type type() { 585 return rule.type; 586 } 587 } 588 589 /** 590 * Represents a non-match with {@code NO_MATCH} or a match against 591 * a non-standard rule in the public suffix list. A non-standard rule 592 * is a wildcard rule that includes wildcards in a label other than 593 * the leading label. The public suffix list doesn't currently have 594 * such rules. 595 */ 596 private static class OtherMatch implements Match { 597 private final Rule rule; 598 private final int numLabels; 599 private final LinkedList<String> target; 600 601 OtherMatch(Rule rule, int numLabels, LinkedList<String> target) { 602 this.rule = rule; 603 this.numLabels = numLabels; 604 this.target = target; 605 } 606 607 @Override 608 public RegisteredDomain registeredDomain() { 609 int nlabels = numLabels + 1; 610 if (nlabels > target.size()) { 611 // special case when registered domain is same as pub suff 612 return null; 613 } 614 return new RegisteredDomainImpl(getSuffixes(nlabels), 615 rule.auth, getSuffixes(numLabels)); 616 } 617 618 @Override 619 public Rule.Type type() { 620 return rule.type; 621 } 622 623 private String getSuffixes(int n) { 624 Iterator<String> targetIter = target.descendingIterator(); 625 StringBuilder sb = new StringBuilder(); 626 while (n > 0 && targetIter.hasNext()) { 627 String s = targetIter.next(); 628 sb.insert(0, s); 629 if (n > 1) { 630 sb.insert(0, '.'); 631 } 632 n--; 633 } 634 return sb.toString(); 635 } 636 } 637 }