1 /*
   2  * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2011, 2012, 2013 Apple Inc. All rights reserved.
   3  *
   4  * Redistribution and use in source and binary forms, with or without
   5  * modification, are permitted provided that the following conditions
   6  * are met:
   7  * 1. Redistributions of source code must retain the above copyright
   8  *    notice, this list of conditions and the following disclaimer.
   9  * 2. Redistributions in binary form must reproduce the above copyright
  10  *    notice, this list of conditions and the following disclaimer in the
  11  *    documentation and/or other materials provided with the distribution.
  12  *
  13  * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
  14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
  17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
  24  */
  25 
  26 #ifndef URL_h
  27 #define URL_h
  28 
  29 #include <wtf/Forward.h>
  30 #include <wtf/HashMap.h>
  31 #include <wtf/RetainPtr.h>
  32 #include <wtf/text/WTFString.h>
  33 
  34 #if USE(CF)
  35 typedef const struct __CFURL* CFURLRef;
  36 #if PLATFORM(JAVA)
  37 typedef const struct __CFString* CFString;
  38 #endif
  39 #endif
  40 
  41 #if USE(SOUP)
  42 #include "GUniquePtrSoup.h"
  43 #endif
  44 
  45 #if USE(FOUNDATION)
  46 OBJC_CLASS NSURL;
  47 #endif
  48 
  49 namespace WebCore {
  50 
  51 class TextEncoding;
  52 struct URLHash;
  53 
  54 enum ParsedURLStringTag { ParsedURLString };
  55 
  56 class URL {
  57 public:
  58     // Generates a URL which contains a null string.
  59     URL() { invalidate(); }
  60 
  61     // The argument is an absolute URL string. The string is assumed to be output of URL::string() called on a valid
  62     // URL object, or indiscernible from such.
  63     // It is usually best to avoid repeatedly parsing a string, unless memory saving outweigh the possible slow-downs.
  64     URL(ParsedURLStringTag, const String&);
  65     explicit URL(WTF::HashTableDeletedValueType) : m_string(WTF::HashTableDeletedValue) { }
  66     bool isHashTableDeletedValue() const { return string().isHashTableDeletedValue(); }
  67 
  68     // Resolves the relative URL with the given base URL. If provided, the
  69     // TextEncoding is used to encode non-ASCII characers. The base URL can be
  70     // null or empty, in which case the relative URL will be interpreted as
  71     // absolute.
  72     // FIXME: If the base URL is invalid, this always creates an invalid
  73     // URL. Instead I think it would be better to treat all invalid base URLs
  74     // the same way we treate null and empty base URLs.
  75     URL(const URL& base, const String& relative);
  76     URL(const URL& base, const String& relative, const TextEncoding&);
  77 
  78     String strippedForUseAsReferrer() const;
  79 
  80     // FIXME: The above functions should be harmonized so that passing a
  81     // base of null or the empty string gives the same result as the
  82     // standard String constructor.
  83 
  84     // Makes a deep copy. Helpful only if you need to use a URL on another
  85     // thread.  Since the underlying StringImpl objects are immutable, there's
  86     // no other reason to ever prefer copy() over plain old assignment.
  87     URL copy() const;
  88 
  89     bool isNull() const;
  90     bool isEmpty() const;
  91     bool isValid() const;
  92 
  93     // Returns true if you can set the host and port for the URL.
  94     // Non-hierarchical URLs don't have a host and port.
  95     bool canSetHostOrPort() const { return isHierarchical(); }
  96 
  97     bool canSetPathname() const { return isHierarchical(); }
  98     bool isHierarchical() const;
  99 
 100     const String& string() const { return m_string; }
 101 
 102     String stringCenterEllipsizedToLength(unsigned length = 1024) const;
 103 
 104     String protocol() const;
 105     String host() const;
 106     unsigned short port() const;
 107     bool hasPort() const;
 108     String user() const;
 109     String pass() const;
 110     String path() const;
 111     String lastPathComponent() const;
 112     String query() const;
 113     String fragmentIdentifier() const;
 114     bool hasFragmentIdentifier() const;
 115 
 116     String baseAsString() const;
 117 
 118     String fileSystemPath() const;
 119 
 120     // Returns true if the current URL's protocol is the same as the null-
 121     // terminated ASCII argument. The argument must be lower-case.
 122     bool protocolIs(const char*) const;
 123     bool protocolIsData() const { return protocolIs("data"); }
 124     bool protocolIsInHTTPFamily() const;
 125     bool isLocalFile() const;
 126     bool isBlankURL() const;
 127 
 128     bool setProtocol(const String&);
 129     void setHost(const String&);
 130 
 131     void removePort();
 132     void setPort(unsigned short);
 133 
 134     // Input is like "foo.com" or "foo.com:8000".
 135     void setHostAndPort(const String&);
 136 
 137     void setUser(const String&);
 138     void setPass(const String&);
 139 
 140     // If you pass an empty path for HTTP or HTTPS URLs, the resulting path
 141     // will be "/".
 142     void setPath(const String&);
 143 
 144     // The query may begin with a question mark, or, if not, one will be added
 145     // for you. Setting the query to the empty string will leave a "?" in the
 146     // URL (with nothing after it). To clear the query, pass a null string.
 147     void setQuery(const String&);
 148 
 149     void setFragmentIdentifier(const String&);
 150     void removeFragmentIdentifier();
 151 
 152     friend bool equalIgnoringFragmentIdentifier(const URL&, const URL&);
 153 
 154     friend bool protocolHostAndPortAreEqual(const URL&, const URL&);
 155 
 156     unsigned hostStart() const;
 157     unsigned hostEnd() const;
 158 
 159     unsigned pathStart() const;
 160     unsigned pathEnd() const;
 161     unsigned pathAfterLastSlash() const;
 162 
 163     operator const String&() const { return string(); }
 164 
 165 #if USE(CF)
 166     URL(CFURLRef);
 167     RetainPtr<CFURLRef> createCFURL() const;
 168 #endif
 169 
 170 #if USE(SOUP)
 171     URL(SoupURI*);
 172     GUniquePtr<SoupURI> createSoupURI() const;
 173 #endif
 174 
 175 #if USE(FOUNDATION)
 176     URL(NSURL*);
 177     operator NSURL*() const;
 178 #endif
 179 #ifdef __OBJC__
 180     operator NSString*() const { return string(); }
 181 #endif
 182 
 183 #if PLATFORM(JAVA)
 184     String deprecatedString() const;
 185     bool isJarFile() const { return m_protocolIsInJar; }
 186 #endif
 187     const URL* innerURL() const { return 0; }
 188 
 189 #ifndef NDEBUG
 190     void print() const;
 191 #endif
 192 
 193     bool isSafeToSendToAnotherThread() const;
 194 
 195 private:
 196     void invalidate();
 197     static bool protocolIs(const String&, const char*);
 198     void init(const URL&, const String&, const TextEncoding&);
 199     void copyToBuffer(Vector<char, 512>& buffer) const;
 200 
 201     // Parses the given URL. The originalString parameter allows for an
 202     // optimization: When the source is the same as the fixed-up string,
 203     // it will use the passed-in string instead of allocating a new one.
 204     void parse(const String&);
 205     void parse(const char* url, const String* originalString = 0);
 206 
 207     bool hasPath() const;
 208 
 209     String m_string;
 210     bool m_isValid : 1;
 211     bool m_protocolIsInHTTPFamily : 1;
 212 #if PLATFORM(JAVA)
 213     bool m_protocolIsInJar : 1;
 214 #endif
 215 
 216     int m_schemeEnd;
 217     int m_userStart;
 218     int m_userEnd;
 219     int m_passwordEnd;
 220     int m_hostEnd;
 221     int m_portEnd;
 222     int m_pathAfterLastSlash;
 223     int m_pathEnd;
 224     int m_queryEnd;
 225     int m_fragmentEnd;
 226 };
 227 
 228 bool operator==(const URL&, const URL&);
 229 bool operator==(const URL&, const String&);
 230 bool operator==(const String&, const URL&);
 231 bool operator!=(const URL&, const URL&);
 232 bool operator!=(const URL&, const String&);
 233 bool operator!=(const String&, const URL&);
 234 
 235 bool equalIgnoringFragmentIdentifier(const URL&, const URL&);
 236 bool protocolHostAndPortAreEqual(const URL&, const URL&);
 237 
 238 const URL& blankURL();
 239 
 240 // Functions to do URL operations on strings.
 241 // These are operations that aren't faster on a parsed URL.
 242 // These are also different from the URL functions in that they don't require the string to be a valid and parsable URL.
 243 // This is especially important because valid javascript URLs are not necessarily considered valid by URL.
 244 
 245 bool protocolIs(const String& url, const char* protocol);
 246 bool protocolIsJavaScript(const String& url);
 247 bool protocolIsInHTTPFamily(const String& url);
 248 
 249 bool isDefaultPortForProtocol(unsigned short port, const String& protocol);
 250 bool portAllowed(const URL&); // Blacklist ports that should never be used for Web resources.
 251 
 252 bool isValidProtocol(const String&);
 253 
 254 String mimeTypeFromDataURL(const String& url);
 255 String mimeTypeFromURL(const URL&);
 256 
 257 // Unescapes the given string using URL escaping rules, given an optional
 258 // encoding (defaulting to UTF-8 otherwise). DANGER: If the URL has "%00"
 259 // in it, the resulting string will have embedded null characters!
 260 String decodeURLEscapeSequences(const String&);
 261 String decodeURLEscapeSequences(const String&, const TextEncoding&);
 262 
 263 String encodeWithURLEscapeSequences(const String&);
 264 
 265 // Inlines.
 266 
 267 inline bool operator==(const URL& a, const URL& b)
 268 {
 269     return a.string() == b.string();
 270 }
 271 
 272 inline bool operator==(const URL& a, const String& b)
 273 {
 274     return a.string() == b;
 275 }
 276 
 277 inline bool operator==(const String& a, const URL& b)
 278 {
 279     return a == b.string();
 280 }
 281 
 282 inline bool operator!=(const URL& a, const URL& b)
 283 {
 284     return a.string() != b.string();
 285 }
 286 
 287 inline bool operator!=(const URL& a, const String& b)
 288 {
 289     return a.string() != b;
 290 }
 291 
 292 inline bool operator!=(const String& a, const URL& b)
 293 {
 294     return a != b.string();
 295 }
 296 
 297 // Inline versions of some non-GoogleURL functions so we can get inlining
 298 // without having to have a lot of ugly ifdefs in the class definition.
 299 
 300 inline bool URL::isNull() const
 301 {
 302     return m_string.isNull();
 303 }
 304 
 305 inline bool URL::isEmpty() const
 306 {
 307     return m_string.isEmpty();
 308 }
 309 
 310 inline bool URL::isValid() const
 311 {
 312     return m_isValid;
 313 }
 314 
 315 inline bool URL::hasPath() const
 316 {
 317     return m_pathEnd != m_portEnd;
 318 }
 319 
 320 inline bool URL::hasPort() const
 321 {
 322     return m_hostEnd < m_portEnd;
 323 }
 324 
 325 inline bool URL::protocolIsInHTTPFamily() const
 326 {
 327     return m_protocolIsInHTTPFamily;
 328 }
 329 
 330 inline unsigned URL::hostStart() const
 331 {
 332     return (m_passwordEnd == m_userStart) ? m_passwordEnd : m_passwordEnd + 1;
 333 }
 334 
 335 inline unsigned URL::hostEnd() const
 336 {
 337     return m_hostEnd;
 338 }
 339 
 340 inline unsigned URL::pathStart() const
 341 {
 342     return m_portEnd;
 343 }
 344 
 345 inline unsigned URL::pathEnd() const
 346 {
 347     return m_pathEnd;
 348 }
 349 
 350 inline unsigned URL::pathAfterLastSlash() const
 351 {
 352     return m_pathAfterLastSlash;
 353 }
 354 
 355 #if PLATFORM(IOS)
 356 void enableURLSchemeCanonicalization(bool);
 357 #endif
 358 
 359 } // namespace WebCore
 360 
 361 namespace WTF {
 362 
 363     // URLHash is the default hash for String
 364     template<typename T> struct DefaultHash;
 365     template<> struct DefaultHash<WebCore::URL> {
 366         typedef WebCore::URLHash Hash;
 367     };
 368 
 369 } // namespace WTF
 370 
 371 #endif // URL_h