1 /*
   2  * Copyright (c) 2017, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package sun.security.util;
  27 
  28 import java.io.BufferedReader;
  29 import java.io.File;
  30 import java.io.FileInputStream;
  31 import java.io.FileNotFoundException;
  32 import java.io.InputStream;
  33 import java.io.InputStreamReader;
  34 import java.io.IOException;
  35 import java.security.AccessController;
  36 import java.security.PrivilegedAction;
  37 import java.util.Arrays;
  38 import java.util.HashSet;
  39 import java.util.Iterator;
  40 import java.util.LinkedList;
  41 import java.util.List;
  42 import java.util.Map;
  43 import java.util.Set;
  44 import java.util.concurrent.ConcurrentHashMap;
  45 import java.util.zip.ZipEntry;
  46 import java.util.zip.ZipInputStream;
  47 
  48 import sun.security.ssl.SSLLogger;
  49 
  50 /**
  51  * Allows public suffixes and registered domains to be determined from a
  52  * string that represents a target domain name. A database of known
  53  * registered suffixes is used to perform the determination.
  54  *
  55  * A public suffix is defined as the rightmost part of a domain name
  56  * that is not owned by an individual registrant. Examples of
  57  * public suffixes are:
  58  *      com
  59  *      edu
  60  *      co.uk
  61  *      k12.ak.us
  62  *      com.tw
  63  *      \u7db2\u8def.tw
  64  *
  65  * Public suffixes effectively denote registration authorities.
  66  *
  67  * A registered domain is a public suffix preceded by one domain label
  68  * and a ".". Examples are:
  69  *      oracle.com
  70  *      mit.edu
  71  *
  72  * The internal database is derived from the information maintained at
  73  * http://publicsuffix.org. The information is fixed for a particular
  74  * JDK installation, but may be updated in future releases or updates.
  75  *
  76  * Because of the large number of top-level domains (TLDs) and public
  77  * suffix rules, we only load the rules on demand -- from a Zip file
  78  * containing an entry for each TLD.
  79  *
  80  * As each entry is loaded, its data is stored permanently in a cache.
  81  *
  82  * The containment hierarchy for the data is shown below:
  83  *
  84  * Rules --> contains all the rules for a particular TLD
  85  *    RuleSet --> contains all the rules that match 1 label
  86  *    RuleSet --> contains all the rules that match 2 labels
  87  *    RuleSet --> contains all the rules that match 3 labels
  88  *      :
  89  *    RuleSet --> contains all the rules that match N labels
  90  *      HashSet of rules, where each rule is an exception rule, a "normal"
  91  *      rule, a wildcard rule (rules that contain a wildcard prefix only),
  92  *      or a LinkedList of "other" rules
  93  *
  94  * The general matching algorithm tries to find a longest match. So, the
  95  * search begins at the RuleSet with the most labels, and works backwards.
  96  *
  97  * Exceptions take priority over all other rules, and if a Rule contains
  98  * any exceptions, then even if we find a "normal" match, we search all
  99  * other RuleSets for exceptions. It is assumed that all other rules don't
 100  * intersect/overlap. If this happens, a match will be returned, but not
 101  * necessarily the expected one. For a further explanation of the rules,
 102  * see http://publicsuffix.org/list/.
 103  *
 104  * The "other" rules are for the (possible future) case where wildcards
 105  * are located in a rule any place other than the beginning.
 106  */
 107 
 108 class DomainName {
 109     /**
 110      * For efficiency, the set of rules for each TLD is kept
 111      * in text files and only loaded if needed.
 112      */
 113     private static final Map<String, Rules> cache = new ConcurrentHashMap<>();
 114 
 115     private DomainName() {}
 116 
 117     /**
 118      * Returns the registered domain of the specified domain.
 119      *
 120      * @param domain the domain name
 121      * @return the registered domain, or null if not known or not registerable
 122      * @throws NullPointerException if domain is null
 123      */
 124     public static RegisteredDomain registeredDomain(String domain) {
 125         Match match = getMatch(domain);
 126         return (match != null) ? match.registeredDomain() : null;
 127     }
 128 
 129     private static Match getMatch(String domain) {
 130         if (domain == null) {
 131             throw new NullPointerException();
 132         }
 133         Rules rules = Rules.getRules(domain);
 134         return rules == null ? null : rules.match(domain);
 135     }
 136 
 137     /**
 138      * A Rules object contains a list of rules for a particular TLD.
 139      *
 140      * Rules are stored in a linked list of RuleSet objects. The list is
 141      * indexed according to the number of labels in the name (minus one)
 142      * such that all rules with the same number of labels are stored
 143      * in the same RuleSet.
 144      *
 145      * Doing this means we can find the longest match first, and also we
 146      * can stop comparing as soon as we find a match.
 147      */
 148     private static class Rules {
 149 
 150         private final LinkedList<RuleSet> ruleSets = new LinkedList<>();
 151         private final boolean hasExceptions;
 152 
 153         private Rules(InputStream is) throws IOException {
 154             InputStreamReader isr = new InputStreamReader(is, "UTF-8");
 155             BufferedReader reader = new BufferedReader(isr);
 156             boolean hasExceptions = false;
 157 
 158             String line;
 159             int type = reader.read();
 160             while (type != -1 && (line = reader.readLine()) != null) {
 161                 int numLabels = RuleSet.numLabels(line);
 162                 if (numLabels != 0) {
 163                     RuleSet ruleset = getRuleSet(numLabels - 1);
 164                     ruleset.addRule(type, line);
 165                     hasExceptions |= ruleset.hasExceptions;
 166                 }
 167                 type = reader.read();
 168             }
 169             this.hasExceptions = hasExceptions;
 170         }
 171 
 172         static Rules getRules(String domain) {
 173             String tld = getTopLevelDomain(domain);
 174             if (tld.isEmpty()) {
 175                 return null;
 176             }
 177             return cache.computeIfAbsent(tld, k -> createRules(tld));
 178         }
 179 
 180         private static String getTopLevelDomain(String domain) {
 181             int n = domain.lastIndexOf('.');
 182             if (n == -1) {
 183                 return domain;
 184             }
 185             return domain.substring(n + 1);
 186         }
 187 
 188         private static Rules createRules(String tld) {
 189             try (InputStream pubSuffixStream = getPubSuffixStream()) {
 190                 if (pubSuffixStream == null) {
 191                     return null;
 192                 }
 193                 return getRules(tld, new ZipInputStream(pubSuffixStream));
 194             } catch (IOException e) {
 195                 if (SSLLogger.isOn && SSLLogger.isOn("ssl")) {
 196                     SSLLogger.fine(
 197                         "cannot parse public suffix data for " + tld +
 198                          ": " + e.getMessage());
 199                 }
 200                 return null;
 201             }
 202         }
 203 
 204         private static InputStream getPubSuffixStream() {
 205             InputStream is = AccessController.doPrivileged(
 206                 new PrivilegedAction<>() {
 207                     @Override
 208                     public InputStream run() {
 209                         File f = new File(System.getProperty("java.home"),
 210                             "lib/security/public_suffix_list.dat");
 211                         try {
 212                             return new FileInputStream(f);
 213                         } catch (FileNotFoundException e) {
 214                             return null;
 215                         }
 216                     }
 217                 }
 218             );
 219             if (is == null) {
 220                 if (SSLLogger.isOn && SSLLogger.isOn("ssl") &&
 221                         SSLLogger.isOn("trustmanager")) {
 222                     SSLLogger.fine(
 223                         "lib/security/public_suffix_list.dat not found");
 224                 }
 225             }
 226             return is;
 227         }
 228 
 229         private static Rules getRules(String tld,
 230                                       ZipInputStream zis) throws IOException {
 231             boolean found = false;
 232             ZipEntry ze = zis.getNextEntry();
 233             while (ze != null && !found) {
 234                 if (ze.getName().equals(tld)) {
 235                     found = true;
 236                 } else {
 237                     ze = zis.getNextEntry();
 238                 }
 239             }
 240             if (!found) {
 241                 if (SSLLogger.isOn && SSLLogger.isOn("ssl")) {
 242                     SSLLogger.fine("Domain " + tld + " not found");
 243                 }
 244                 return null;
 245             }
 246             return new Rules(zis);
 247         }
 248 
 249         /**
 250          * Return the requested RuleSet. If it hasn't been created yet,
 251          * create it and any RuleSets leading up to it.
 252          */
 253         private RuleSet getRuleSet(int index) {
 254             if (index < ruleSets.size()) {
 255                 return ruleSets.get(index);
 256             }
 257             RuleSet r = null;
 258             for (int i = ruleSets.size(); i <= index; i++) {
 259                 r = new RuleSet(i + 1);
 260                 ruleSets.add(r);
 261             }
 262             return r;
 263         }
 264 
 265         /**
 266          * Find a match for the target string.
 267          */
 268         Match match(String domain) {
 269             // Start at the end of the rules list, looking for longest match.
 270             // After we find a normal match, we only look for exceptions.
 271             Match possibleMatch = null;
 272 
 273             Iterator<RuleSet> it = ruleSets.descendingIterator();
 274             while (it.hasNext()) {
 275                 RuleSet ruleSet = it.next();
 276                 Match match = ruleSet.match(domain);
 277                 if (match != null) {
 278                     if (match.type() == Rule.Type.EXCEPTION || !hasExceptions) {
 279                         return match;
 280                     }
 281                     if (possibleMatch == null) {
 282                         possibleMatch = match;
 283                     }
 284                 }
 285             }
 286             return possibleMatch;
 287         }
 288 
 289         /**
 290          * Represents a set of rules with the same number of labels
 291          * and for a particular TLD.
 292          *
 293          * Examples:
 294          *      numLabels = 2
 295          *      names: co.uk, ac.uk
 296          *      wildcards *.de (only "de" stored in HashSet)
 297          *      exceptions: !foo.de (stored as "foo.de")
 298          */
 299         private static class RuleSet {
 300             // the number of labels in this ruleset
 301             private final int numLabels;
 302             private final Set<Rule> rules = new HashSet<>();
 303             boolean hasExceptions = false;
 304             private static final RegisteredDomain.Type[] AUTHS =
 305                 RegisteredDomain.Type.values();
 306 
 307             RuleSet(int n) {
 308                 numLabels = n;
 309             }
 310 
 311             void addRule(int auth, String rule) {
 312                 if (rule.startsWith("!")) {
 313                     rules.add(new Rule(rule.substring(1), Rule.Type.EXCEPTION,
 314                                        AUTHS[auth]));
 315                     hasExceptions = true;
 316                 } else if (rule.startsWith("*.") &&
 317                            rule.lastIndexOf('*') == 0) {
 318                     rules.add(new Rule(rule.substring(2), Rule.Type.WILDCARD,
 319                                        AUTHS[auth]));
 320                 } else if (rule.indexOf('*') == -1) {
 321                     // a "normal" label
 322                     rules.add(new Rule(rule, Rule.Type.NORMAL, AUTHS[auth]));
 323                 } else {
 324                     // There is a wildcard in a non-leading label. This case
 325                     // doesn't currently exist, but we need to handle it anyway.
 326                     rules.add(new OtherRule(rule, AUTHS[auth], split(rule)));
 327                 }
 328             }
 329 
 330             Match match(String domain) {
 331                 Match match = null;
 332                 for (Rule rule : rules) {
 333                     switch (rule.type) {
 334                         case NORMAL:
 335                             if (match == null) {
 336                                 match = matchNormal(domain, rule);
 337                             }
 338                             break;
 339                         case WILDCARD:
 340                             if (match == null) {
 341                                 match = matchWildcard(domain, rule);
 342                             }
 343                             break;
 344                         case OTHER:
 345                             if (match == null) {
 346                                 match = matchOther(domain, rule);
 347                             }
 348                             break;
 349                         case EXCEPTION:
 350                             Match excMatch = matchException(domain, rule);
 351                             if (excMatch != null) {
 352                                 return excMatch;
 353                             }
 354                             break;
 355                     }
 356                 }
 357                 return match;
 358             }
 359 
 360             private static LinkedList<String> split(String rule) {
 361                 String[] labels = rule.split("\\.");
 362                 return new LinkedList<>(Arrays.asList(labels));
 363             }
 364 
 365             private static int numLabels(String rule) {
 366                 if (rule.equals("")) {
 367                     return 0;
 368                 }
 369                 int len = rule.length();
 370                 int count = 0;
 371                 int index = 0;
 372                 while (index < len) {
 373                     int pos;
 374                     if ((pos = rule.indexOf('.', index)) == -1) {
 375                         return count + 1;
 376                     }
 377                     index = pos + 1;
 378                     count++;
 379                 }
 380                 return count;
 381             }
 382 
 383             /**
 384              * Check for a match with an explicit name rule or a wildcard rule
 385              * (i.e., a non-exception rule).
 386              */
 387             private Match matchNormal(String domain, Rule rule) {
 388                 int index = labels(domain, numLabels);
 389                 if (index == -1) {
 390                     return null;
 391                 }
 392 
 393                 // Check for explicit names.
 394                 String substring = domain.substring(index);
 395                 if (rule.domain.equals(substring)) {
 396                     return new CommonMatch(domain, rule, index);
 397                 }
 398 
 399                 return null;
 400             }
 401 
 402             private Match matchWildcard(String domain, Rule rule) {
 403                 // Now check for wildcards. In this case, there is one fewer
 404                 // label than numLabels.
 405                 int index = labels(domain, numLabels - 1);
 406                 if (index > 0) {
 407                     String substring = domain.substring(index);
 408 
 409                     if (rule.domain.equals(substring)) {
 410                         return new CommonMatch(domain, rule,
 411                                                labels(domain, numLabels));
 412                     }
 413                 }
 414 
 415                 return null;
 416             }
 417 
 418             /**
 419              * Check for a match with an exception rule.
 420              */
 421             private Match matchException(String domain, Rule rule) {
 422                 int index = labels(domain, numLabels);
 423                 if (index == -1) {
 424                     return null;
 425                 }
 426                 String substring = domain.substring(index);
 427 
 428                 if (rule.domain.equals(substring)) {
 429                     return new CommonMatch(domain, rule,
 430                                            labels(domain, numLabels - 1));
 431                 }
 432 
 433                 return null;
 434             }
 435 
 436             /**
 437              * A left-to-right comparison of labels.
 438              * The simplest approach to doing match() would be to
 439              * use a descending iterator giving a right-to-left comparison.
 440              * But, it's more efficient to do left-to-right compares
 441              * because the left most labels are the ones most likely to be
 442              * different. We just have to figure out which label to start at.
 443              */
 444             private Match matchOther(String domain, Rule rule) {
 445                 OtherRule otherRule = (OtherRule)rule;
 446                 LinkedList<String> target = split(domain);
 447 
 448                 int diff = target.size() - numLabels;
 449                 if (diff < 0) {
 450                     return null;
 451                 }
 452 
 453                 boolean found = true;
 454                 for (int i = 0; i < numLabels; i++) {
 455                     String ruleLabel = otherRule.labels.get(i);
 456                     String targetLabel = target.get(i + diff);
 457 
 458                     if (ruleLabel.charAt(0) != '*' &&
 459                         !ruleLabel.equalsIgnoreCase(targetLabel)) {
 460                         found = false;
 461                         break;
 462                     }
 463                 }
 464                 if (found) {
 465                     return new OtherMatch(rule, numLabels, target);
 466                 }
 467                 return null;
 468             }
 469 
 470             /**
 471              * Returns a substring (index) with the n right-most labels from s.
 472              * Returns -1 if s does not have at least n labels, 0, if the
 473              * substring is s.
 474              */
 475             private static int labels(String s, int n) {
 476                 if (n < 1) {
 477                     return -1;
 478                 }
 479                 int index = s.length();
 480                 for (int i = 0; i < n; i++) {
 481                     int next = s.lastIndexOf('.', index);
 482                     if (next == -1) {
 483                         if (i == n - 1) {
 484                             return 0;
 485                         } else {
 486                             return -1;
 487                         }
 488                     }
 489                     index = next - 1;
 490                 }
 491                 return index + 2;
 492             }
 493         }
 494     }
 495 
 496     private static class Rule {
 497         enum Type { EXCEPTION, NORMAL, OTHER, WILDCARD }
 498 
 499         String domain;
 500         Type type;
 501         RegisteredDomain.Type auth;
 502         Rule(String domain, Type type, RegisteredDomain.Type auth) {
 503             this.domain = domain;
 504             this.type = type;
 505             this.auth = auth;
 506         }
 507     }
 508 
 509     private static class OtherRule extends Rule {
 510         List<String> labels;
 511         OtherRule(String domain, RegisteredDomain.Type auth,
 512                   List<String> labels) {
 513             super(domain, Type.OTHER, auth);
 514             this.labels = labels;
 515         }
 516     }
 517 
 518     /**
 519      * Represents a string's match with a rule in the public suffix list.
 520      */
 521     private interface Match {
 522         RegisteredDomain registeredDomain();
 523         Rule.Type type();
 524     }
 525 
 526     private static class RegisteredDomainImpl implements RegisteredDomain {
 527         private final String name;
 528         private final Type type;
 529         private final String publicSuffix;
 530         RegisteredDomainImpl(String name, Type type, String publicSuffix) {
 531             this.name = name;
 532             this.type = type;
 533             this.publicSuffix = publicSuffix;
 534         }
 535         @Override
 536         public String name() {
 537             return name;
 538         }
 539         @Override
 540         public Type type() {
 541             return type;
 542         }
 543         @Override
 544         public String publicSuffix() {
 545             return publicSuffix;
 546         }
 547     }
 548 
 549     /**
 550      * Represents a match against a standard rule in the public suffix list.
 551      * A standard rule is an explicit name, a wildcard rule with a wildcard
 552      * only in the leading label, or an exception rule.
 553      */
 554     private static class CommonMatch implements Match {
 555         private String domain;
 556         private int publicSuffix; // index to
 557         private int registeredDomain; // index to
 558         private final Rule rule;
 559 
 560         CommonMatch(String domain, Rule rule, int publicSuffix) {
 561             this.domain = domain;
 562             this.publicSuffix = publicSuffix;
 563             this.rule = rule;
 564             // now locate the previous label
 565             registeredDomain = domain.lastIndexOf('.', publicSuffix - 2);
 566             if (registeredDomain == -1) {
 567                 registeredDomain = 0;
 568             } else {
 569                 registeredDomain++;
 570             }
 571         }
 572 
 573         @Override
 574         public RegisteredDomain registeredDomain() {
 575             if (publicSuffix == 0) {
 576                 return null;
 577             }
 578             return new RegisteredDomainImpl(domain.substring(registeredDomain),
 579                                             rule.auth,
 580                                             domain.substring(publicSuffix));
 581         }
 582 
 583         @Override
 584         public Rule.Type type() {
 585             return rule.type;
 586         }
 587     }
 588 
 589     /**
 590      * Represents a non-match with {@code NO_MATCH} or a match against
 591      * a non-standard rule in the public suffix list. A non-standard rule
 592      * is a wildcard rule that includes wildcards in a label other than
 593      * the leading label. The public suffix list doesn't currently have
 594      * such rules.
 595      */
 596     private static class OtherMatch implements Match {
 597         private final Rule rule;
 598         private final int numLabels;
 599         private final LinkedList<String> target;
 600 
 601         OtherMatch(Rule rule, int numLabels, LinkedList<String> target) {
 602             this.rule = rule;
 603             this.numLabels = numLabels;
 604             this.target = target;
 605         }
 606 
 607         @Override
 608         public RegisteredDomain registeredDomain() {
 609             int nlabels = numLabels + 1;
 610             if (nlabels > target.size()) {
 611                 // special case when registered domain is same as pub suff
 612                 return null;
 613             }
 614             return new RegisteredDomainImpl(getSuffixes(nlabels),
 615                                             rule.auth, getSuffixes(numLabels));
 616         }
 617 
 618         @Override
 619         public Rule.Type type() {
 620             return rule.type;
 621         }
 622 
 623         private String getSuffixes(int n) {
 624             Iterator<String> targetIter = target.descendingIterator();
 625             StringBuilder sb = new StringBuilder();
 626             while (n > 0 && targetIter.hasNext()) {
 627                 String s = targetIter.next();
 628                 sb.insert(0, s);
 629                 if (n > 1) {
 630                     sb.insert(0, '.');
 631                 }
 632                 n--;
 633             }
 634             return sb.toString();
 635         }
 636     }
 637 }