1 /*
   2  * Copyright (c) 1996, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package sun.net;
  27 
  28 /**
  29  * Helper class to map URL "abbreviations" to real URLs.
  30  * The default implementation supports the following mappings:
  31  *   ftp.mumble.bar/... => ftp://ftp.mumble.bar/...
  32  *   gopher.mumble.bar/... => gopher://gopher.mumble.bar/...
  33  *   other.name.dom/... => http://other.name.dom/...
  34  *   /foo/... => file:/foo/...
  35  *
  36  * Full URLs (those including a protocol name) are passed through unchanged.
  37  *
  38  * Subclassers can override or extend this behavior to support different
  39  * or additional canonicalization policies.
  40  *
  41  * @author      Steve Byrne
  42  */
  43 
  44 public class URLCanonicalizer {
  45     /**
  46      * Creates the default canonicalizer instance.
  47      */
  48     public URLCanonicalizer() { }
  49 
  50     /**
  51      * Given a possibly abbreviated URL (missing a protocol name, typically),
  52      * this method's job is to transform that URL into a canonical form,
  53      * by including a protocol name and additional syntax, if necessary.
  54      *
  55      * For a correctly formed URL, this method should just return its argument.
  56      */
  57     public String canonicalize(String simpleURL) {
  58         String resultURL = simpleURL;
  59         if (simpleURL.startsWith("ftp.")) {
  60             resultURL = "ftp://" + simpleURL;
  61         } else if (simpleURL.startsWith("gopher.")) {
  62             resultURL = "gopher://" + simpleURL;
  63         } else if (simpleURL.startsWith("/")) {
  64             resultURL = "file:" + simpleURL;
  65         } else if (!hasProtocolName(simpleURL)) {
  66             if (isSimpleHostName(simpleURL)) {
  67                 simpleURL = "www." + simpleURL + ".com";
  68             }
  69             resultURL = "http://" + simpleURL;
  70         }
  71 
  72         return resultURL;
  73     }
  74 
  75     /**
  76      * Given a possibly abbreviated URL, this predicate function returns
  77      * true if it appears that the URL contains a protocol name
  78      */
  79     public boolean hasProtocolName(String url) {
  80         int index = url.indexOf(':');
  81         if (index <= 0) {       // treat ":foo" as not having a protocol spec
  82             return false;
  83         }
  84 
  85         for (int i = 0; i < index; i++) {
  86             char c = url.charAt(i);
  87 
  88             // REMIND: this is a guess at legal characters in a protocol --
  89             // need to be verified
  90             if ((c >= 'A' && c <= 'Z')
  91                 || (c >= 'a' && c <= 'z')
  92                 || (c == '-')) {
  93                 continue;
  94             }
  95 
  96             // found an illegal character
  97             return false;
  98         }
  99 
 100         return true;
 101     }
 102 
 103     /**
 104      * Returns true if the URL is just a single name, no periods or
 105      * slashes, false otherwise
 106      **/
 107     protected boolean isSimpleHostName(String url) {
 108 
 109         for (int i = 0; i < url.length(); i++) {
 110             char c = url.charAt(i);
 111 
 112             // REMIND: this is a guess at legal characters in a protocol --
 113             // need to be verified
 114             if ((c >= 'A' && c <= 'Z')
 115                 || (c >= 'a' && c <= 'z')
 116                 || (c >= '0' && c <= '9')
 117                 || (c == '-')) {
 118                 continue;
 119             }
 120 
 121             // found an illegal character
 122             return false;
 123         }
 124 
 125         return true;
 126     }
 127 }