1 /* 2 * Copyright (c) 1996, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package sun.net; 27 28 /** 29 * Helper class to map URL "abbreviations" to real URLs. 30 * The default implementation supports the following mappings: 31 * ftp.mumble.bar/... => ftp://ftp.mumble.bar/... 32 * gopher.mumble.bar/... => gopher://gopher.mumble.bar/... 33 * other.name.dom/... => http://other.name.dom/... 34 * /foo/... => file:/foo/... 35 * 36 * Full URLs (those including a protocol name) are passed through unchanged. 37 * 38 * Subclassers can override or extend this behavior to support different 39 * or additional canonicalization policies. 40 * 41 * @author Steve Byrne 42 */ 43 44 public class URLCanonicalizer { 45 /** 46 * Creates the default canonicalizer instance. 47 */ 48 public URLCanonicalizer() { } 49 50 /** 51 * Given a possibly abbreviated URL (missing a protocol name, typically), 52 * this method's job is to transform that URL into a canonical form, 53 * by including a protocol name and additional syntax, if necessary. 54 * 55 * For a correctly formed URL, this method should just return its argument. 56 */ 57 public String canonicalize(String simpleURL) { 58 String resultURL = simpleURL; 59 if (simpleURL.startsWith("ftp.")) { 60 resultURL = "ftp://" + simpleURL; 61 } else if (simpleURL.startsWith("gopher.")) { 62 resultURL = "gopher://" + simpleURL; 63 } else if (simpleURL.startsWith("/")) { 64 resultURL = "file:" + simpleURL; 65 } else if (!hasProtocolName(simpleURL)) { 66 if (isSimpleHostName(simpleURL)) { 67 simpleURL = "www." + simpleURL + ".com"; 68 } 69 resultURL = "http://" + simpleURL; 70 } 71 72 return resultURL; 73 } 74 75 /** 76 * Given a possibly abbreviated URL, this predicate function returns 77 * true if it appears that the URL contains a protocol name 78 */ 79 public boolean hasProtocolName(String url) { 80 int index = url.indexOf(':'); 81 if (index <= 0) { // treat ":foo" as not having a protocol spec 82 return false; 83 } 84 85 for (int i = 0; i < index; i++) { 86 char c = url.charAt(i); 87 88 // REMIND: this is a guess at legal characters in a protocol -- 89 // need to be verified 90 if ((c >= 'A' && c <= 'Z') 91 || (c >= 'a' && c <= 'z') 92 || (c == '-')) { 93 continue; 94 } 95 96 // found an illegal character 97 return false; 98 } 99 100 return true; 101 } 102 103 /** 104 * Returns true if the URL is just a single name, no periods or 105 * slashes, false otherwise 106 **/ 107 protected boolean isSimpleHostName(String url) { 108 109 for (int i = 0; i < url.length(); i++) { 110 char c = url.charAt(i); 111 112 // REMIND: this is a guess at legal characters in a protocol -- 113 // need to be verified 114 if ((c >= 'A' && c <= 'Z') 115 || (c >= 'a' && c <= 'z') 116 || (c >= '0' && c <= '9') 117 || (c == '-')) { 118 continue; 119 } 120 121 // found an illegal character 122 return false; 123 } 124 125 return true; 126 } 127 }