1 /*
   2  * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2011, 2012, 2013 Apple Inc. All rights reserved.
   3  *
   4  * Redistribution and use in source and binary forms, with or without
   5  * modification, are permitted provided that the following conditions
   6  * are met:
   7  * 1. Redistributions of source code must retain the above copyright
   8  *    notice, this list of conditions and the following disclaimer.
   9  * 2. Redistributions in binary form must reproduce the above copyright
  10  *    notice, this list of conditions and the following disclaimer in the
  11  *    documentation and/or other materials provided with the distribution.
  12  *
  13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
  14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
  17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  24  */
  25 
  26 #pragma once
  27 
  28 #include "PlatformExportMacros.h"
  29 #include <wtf/Forward.h>
  30 #include <wtf/RetainPtr.h>
  31 #include <wtf/text/WTFString.h>
  32 
  33 #if USE(CF)
  34 typedef const struct __CFURL* CFURLRef;
  35 #if PLATFORM(JAVA)
  36 #include <wtf/java/JavaEnv.h>
  37 typedef const struct __CFString* CFString;
  38 #endif
  39 #endif
  40 
  41 #if USE(SOUP)
  42 #include "GUniquePtrSoup.h"
  43 #endif
  44 
  45 #if USE(FOUNDATION)
  46 OBJC_CLASS NSURL;
  47 #endif
  48 
  49 namespace WebCore {
  50 
  51 class TextEncoding;
  52 struct URLHash;
  53 
  54 enum ParsedURLStringTag { ParsedURLString };
  55 
  56 class URL {
  57 public:
  58     // Generates a URL which contains a null string.
  59     URL() { invalidate(); }
  60 
  61     // The argument is an absolute URL string. The string is assumed to be output of URL::string() called on a valid
  62     // URL object, or indiscernible from such.
  63     // It is usually best to avoid repeatedly parsing a string, unless memory saving outweigh the possible slow-downs.
  64     WEBCORE_EXPORT URL(ParsedURLStringTag, const String&);
  65     explicit URL(WTF::HashTableDeletedValueType) : m_string(WTF::HashTableDeletedValue) { }
  66     bool isHashTableDeletedValue() const { return string().isHashTableDeletedValue(); }
  67 
  68     // Resolves the relative URL with the given base URL. If provided, the
  69     // TextEncoding is used to encode non-ASCII characers. The base URL can be
  70     // null or empty, in which case the relative URL will be interpreted as
  71     // absolute.
  72     // FIXME: If the base URL is invalid, this always creates an invalid
  73     // URL. Instead I think it would be better to treat all invalid base URLs
  74     // the same way we treate null and empty base URLs.
  75     WEBCORE_EXPORT URL(const URL& base, const String& relative);
  76     URL(const URL& base, const String& relative, const TextEncoding&);
  77 
  78     static URL fakeURLWithRelativePart(const String&);
  79     static URL fileURLWithFileSystemPath(const String&);
  80 
  81     String strippedForUseAsReferrer() const;
  82 
  83     // FIXME: The above functions should be harmonized so that passing a
  84     // base of null or the empty string gives the same result as the
  85     // standard String constructor.
  86 
  87     // Makes a deep copy. Helpful only if you need to use a URL on another
  88     // thread. Since the underlying StringImpl objects are immutable, there's
  89     // no other reason to ever prefer isolatedCopy() over plain old assignment.
  90     WEBCORE_EXPORT URL isolatedCopy() const;
  91 
  92     bool isNull() const;
  93     bool isEmpty() const;
  94     bool isValid() const;
  95 
  96     // Returns true if you can set the host and port for the URL.
  97     // Non-hierarchical URLs don't have a host and port.
  98     bool canSetHostOrPort() const { return isHierarchical(); }
  99 
 100     bool canSetPathname() const { return isHierarchical(); }
 101     bool isHierarchical() const;
 102 
 103     const String& string() const { return m_string; }
 104 
 105     WEBCORE_EXPORT String stringCenterEllipsizedToLength(unsigned length = 1024) const;
 106 
 107     WEBCORE_EXPORT StringView protocol() const;
 108     WEBCORE_EXPORT String host() const;
 109     WEBCORE_EXPORT std::optional<uint16_t> port() const;
 110     WEBCORE_EXPORT String hostAndPort() const;
 111     WEBCORE_EXPORT String protocolHostAndPort() const;
 112     WEBCORE_EXPORT String user() const;
 113     WEBCORE_EXPORT String pass() const;
 114     WEBCORE_EXPORT String path() const;
 115     WEBCORE_EXPORT String lastPathComponent() const;
 116     WEBCORE_EXPORT String query() const;
 117     WEBCORE_EXPORT String fragmentIdentifier() const;
 118     WEBCORE_EXPORT bool hasFragmentIdentifier() const;
 119 
 120     bool hasUsername() const;
 121     bool hasPassword() const;
 122     bool hasQuery() const;
 123     bool hasFragment() const;
 124 
 125     // Unlike user() and pass(), these functions don't decode escape sequences.
 126     // This is necessary for accurate round-tripping, because encoding doesn't encode '%' characters.
 127     String encodedUser() const;
 128     String encodedPass() const;
 129 
 130     WEBCORE_EXPORT String baseAsString() const;
 131 
 132     WEBCORE_EXPORT String fileSystemPath() const;
 133 
 134     // Returns true if the current URL's protocol is the same as the null-
 135     // terminated ASCII argument. The argument must be lower-case.
 136     WEBCORE_EXPORT bool protocolIs(const char*) const;
 137     bool protocolIs(StringView) const;
 138     bool protocolIsBlob() const { return protocolIs("blob"); }
 139     bool protocolIsData() const { return protocolIs("data"); }
 140     bool protocolIsInHTTPFamily() const;
 141     WEBCORE_EXPORT bool isLocalFile() const;
 142     bool isBlankURL() const;
 143     bool cannotBeABaseURL() const { return m_cannotBeABaseURL; }
 144 
 145     WEBCORE_EXPORT bool setProtocol(const String&);
 146     void setHost(const String&);
 147 
 148     void removePort();
 149     void setPort(unsigned short);
 150 
 151     // Input is like "foo.com" or "foo.com:8000".
 152     void setHostAndPort(const String&);
 153 
 154     void setUser(const String&);
 155     void setPass(const String&);
 156 
 157     // If you pass an empty path for HTTP or HTTPS URLs, the resulting path
 158     // will be "/".
 159     WEBCORE_EXPORT void setPath(const String&);
 160 
 161     // The query may begin with a question mark, or, if not, one will be added
 162     // for you. Setting the query to the empty string will leave a "?" in the
 163     // URL (with nothing after it). To clear the query, pass a null string.
 164     void setQuery(const String&);
 165 
 166     void setFragmentIdentifier(StringView);
 167     void removeFragmentIdentifier();
 168 
 169     WEBCORE_EXPORT friend bool equalIgnoringFragmentIdentifier(const URL&, const URL&);
 170 
 171     WEBCORE_EXPORT friend bool protocolHostAndPortAreEqual(const URL&, const URL&);
 172 
 173     unsigned hostStart() const;
 174     unsigned hostEnd() const;
 175 
 176     unsigned pathStart() const;
 177     unsigned pathEnd() const;
 178     unsigned pathAfterLastSlash() const;
 179 
 180     operator const String&() const { return string(); }
 181 
 182 #if USE(CF)
 183     WEBCORE_EXPORT URL(CFURLRef);
 184     WEBCORE_EXPORT RetainPtr<CFURLRef> createCFURL() const;
 185 #endif
 186 
 187 #if USE(SOUP)
 188     URL(SoupURI*);
 189     GUniquePtr<SoupURI> createSoupURI() const;
 190 #endif
 191 
 192 #if USE(FOUNDATION)
 193     WEBCORE_EXPORT URL(NSURL*);
 194     WEBCORE_EXPORT operator NSURL*() const;
 195 #endif
 196 #ifdef __OBJC__
 197     operator NSString*() const { return string(); }
 198 #endif
 199 
 200 #if PLATFORM(JAVA)
 201     bool isJarFile() const { return m_protocolIsInJar; }
 202     URL(JNIEnv* env, jstring url) : URL(ParsedURLString, String(env, url)) {}
 203 #endif
 204 
 205 #ifndef NDEBUG
 206     void print() const;
 207 #endif
 208 
 209     template <class Encoder> void encode(Encoder&) const;
 210     template <class Decoder> static bool decode(Decoder&, URL&);
 211 
 212     String serialize(bool omitFragment = false) const;
 213 
 214 private:
 215     friend class URLParser;
 216     WEBCORE_EXPORT void invalidate();
 217     static bool protocolIs(const String&, const char*);
 218     void init(const URL&, const String&, const TextEncoding&);
 219     void copyToBuffer(Vector<char, 512>& buffer) const;
 220 
 221     // Parses the given URL. The originalString parameter allows for an
 222     // optimization: When the source is the same as the fixed-up string,
 223     // it will use the passed-in string instead of allocating a new one.
 224     void parse(const String&);
 225     void parse(const char* url, const String* originalString = 0);
 226 
 227     bool hasPath() const;
 228 
 229     String m_string;
 230     bool m_isValid : 1;
 231     bool m_protocolIsInHTTPFamily : 1;
 232 #if PLATFORM(JAVA)
 233     bool m_protocolIsInJar : 1;
 234 #endif
 235     bool m_cannotBeABaseURL : 1;
 236 
 237     unsigned m_schemeEnd;
 238     unsigned m_userStart;
 239     unsigned m_userEnd;
 240     unsigned m_passwordEnd;
 241     unsigned m_hostEnd;
 242     unsigned m_portEnd;
 243     unsigned m_pathAfterLastSlash;
 244     unsigned m_pathEnd;
 245     unsigned m_queryEnd;
 246     unsigned m_fragmentEnd;
 247 };
 248 
 249 template <class Encoder>
 250 void URL::encode(Encoder& encoder) const
 251 {
 252     encoder << m_string;
 253     encoder << static_cast<bool>(m_isValid);
 254     if (!m_isValid)
 255         return;
 256     encoder << static_cast<bool>(m_protocolIsInHTTPFamily);
 257     encoder << m_schemeEnd;
 258     encoder << m_userStart;
 259     encoder << m_userEnd;
 260     encoder << m_passwordEnd;
 261     encoder << m_hostEnd;
 262     encoder << m_portEnd;
 263     encoder << m_pathAfterLastSlash;
 264     encoder << m_pathEnd;
 265     encoder << m_queryEnd;
 266     encoder << m_fragmentEnd;
 267 }
 268 
 269 template <class Decoder>
 270 bool URL::decode(Decoder& decoder, URL& url)
 271 {
 272     if (!decoder.decode(url.m_string))
 273         return false;
 274     bool isValid;
 275     if (!decoder.decode(isValid))
 276         return false;
 277     url.m_isValid = isValid;
 278     if (!isValid)
 279         return true;
 280     bool protocolIsInHTTPFamily;
 281     if (!decoder.decode(protocolIsInHTTPFamily))
 282         return false;
 283     url.m_protocolIsInHTTPFamily = protocolIsInHTTPFamily;
 284     if (!decoder.decode(url.m_schemeEnd))
 285         return false;
 286     if (!decoder.decode(url.m_userStart))
 287         return false;
 288     if (!decoder.decode(url.m_userEnd))
 289         return false;
 290     if (!decoder.decode(url.m_passwordEnd))
 291         return false;
 292     if (!decoder.decode(url.m_hostEnd))
 293         return false;
 294     if (!decoder.decode(url.m_portEnd))
 295         return false;
 296     if (!decoder.decode(url.m_pathAfterLastSlash))
 297         return false;
 298     if (!decoder.decode(url.m_pathEnd))
 299         return false;
 300     if (!decoder.decode(url.m_queryEnd))
 301         return false;
 302     if (!decoder.decode(url.m_fragmentEnd))
 303         return false;
 304     return true;
 305 }
 306 
 307 bool operator==(const URL&, const URL&);
 308 bool operator==(const URL&, const String&);
 309 bool operator==(const String&, const URL&);
 310 bool operator!=(const URL&, const URL&);
 311 bool operator!=(const URL&, const String&);
 312 bool operator!=(const String&, const URL&);
 313 
 314 WEBCORE_EXPORT bool equalIgnoringFragmentIdentifier(const URL&, const URL&);
 315 WEBCORE_EXPORT bool protocolHostAndPortAreEqual(const URL&, const URL&);
 316 WEBCORE_EXPORT bool hostsAreEqual(const URL&, const URL&);
 317 
 318 WEBCORE_EXPORT const URL& blankURL();
 319 
 320 // Functions to do URL operations on strings.
 321 // These are operations that aren't faster on a parsed URL.
 322 // These are also different from the URL functions in that they don't require the string to be a valid and parsable URL.
 323 // This is especially important because valid javascript URLs are not necessarily considered valid by URL.
 324 
 325 WEBCORE_EXPORT bool protocolIs(const String& url, const char* protocol);
 326 WEBCORE_EXPORT bool protocolIsJavaScript(const String& url);
 327 WEBCORE_EXPORT bool protocolIsInHTTPFamily(const String& url);
 328 
 329 std::optional<uint16_t> defaultPortForProtocol(StringView protocol);
 330 WEBCORE_EXPORT bool isDefaultPortForProtocol(uint16_t port, StringView protocol);
 331 WEBCORE_EXPORT bool portAllowed(const URL&); // Blacklist ports that should never be used for Web resources.
 332 
 333 WEBCORE_EXPORT void registerDefaultPortForProtocolForTesting(uint16_t port, const String& protocol);
 334 WEBCORE_EXPORT void clearDefaultPortForProtocolMapForTesting();
 335 
 336 bool isValidProtocol(const String&);
 337 
 338 String mimeTypeFromDataURL(const String& url);
 339 WEBCORE_EXPORT String mimeTypeFromURL(const URL&);
 340 
 341 // Unescapes the given string using URL escaping rules, given an optional
 342 // encoding (defaulting to UTF-8 otherwise). DANGER: If the URL has "%00"
 343 // in it, the resulting string will have embedded null characters!
 344 WEBCORE_EXPORT String decodeURLEscapeSequences(const String&);
 345 String decodeURLEscapeSequences(const String&, const TextEncoding&);
 346 
 347 // FIXME: This is a wrong concept to expose, different parts of a URL need different escaping per the URL Standard.
 348 WEBCORE_EXPORT String encodeWithURLEscapeSequences(const String&);
 349 
 350 #if PLATFORM(IOS)
 351 WEBCORE_EXPORT void enableURLSchemeCanonicalization(bool);
 352 #endif
 353 
 354 // Inlines.
 355 
 356 inline bool operator==(const URL& a, const URL& b)
 357 {
 358     return a.string() == b.string();
 359 }
 360 
 361 inline bool operator==(const URL& a, const String& b)
 362 {
 363     return a.string() == b;
 364 }
 365 
 366 inline bool operator==(const String& a, const URL& b)
 367 {
 368     return a == b.string();
 369 }
 370 
 371 inline bool operator!=(const URL& a, const URL& b)
 372 {
 373     return a.string() != b.string();
 374 }
 375 
 376 inline bool operator!=(const URL& a, const String& b)
 377 {
 378     return a.string() != b;
 379 }
 380 
 381 inline bool operator!=(const String& a, const URL& b)
 382 {
 383     return a != b.string();
 384 }
 385 
 386 // Inline versions of some non-GoogleURL functions so we can get inlining
 387 // without having to have a lot of ugly ifdefs in the class definition.
 388 
 389 inline bool URL::isNull() const
 390 {
 391     return m_string.isNull();
 392 }
 393 
 394 inline bool URL::isEmpty() const
 395 {
 396     return m_string.isEmpty();
 397 }
 398 
 399 inline bool URL::isValid() const
 400 {
 401     return m_isValid;
 402 }
 403 
 404 inline bool URL::hasPath() const
 405 {
 406     return m_pathEnd != m_portEnd;
 407 }
 408 
 409 inline bool URL::hasUsername() const
 410 {
 411     return m_userEnd > m_userStart;
 412 }
 413 
 414 inline bool URL::hasPassword() const
 415 {
 416     return m_passwordEnd > (m_userEnd + 1);
 417 }
 418 
 419 inline bool URL::hasQuery() const
 420 {
 421     return m_queryEnd > m_pathEnd;
 422 }
 423 
 424 inline bool URL::hasFragment() const
 425 {
 426     return m_fragmentEnd > m_queryEnd;
 427 }
 428 
 429 inline bool URL::protocolIsInHTTPFamily() const
 430 {
 431     return m_protocolIsInHTTPFamily;
 432 }
 433 
 434 inline unsigned URL::hostStart() const
 435 {
 436     return (m_passwordEnd == m_userStart) ? m_passwordEnd : m_passwordEnd + 1;
 437 }
 438 
 439 inline unsigned URL::hostEnd() const
 440 {
 441     return m_hostEnd;
 442 }
 443 
 444 inline unsigned URL::pathStart() const
 445 {
 446     return m_portEnd;
 447 }
 448 
 449 inline unsigned URL::pathEnd() const
 450 {
 451     return m_pathEnd;
 452 }
 453 
 454 inline unsigned URL::pathAfterLastSlash() const
 455 {
 456     return m_pathAfterLastSlash;
 457 }
 458 
 459 } // namespace WebCore
 460 
 461 namespace WTF {
 462 
 463     // URLHash is the default hash for String
 464     template<typename T> struct DefaultHash;
 465     template<> struct DefaultHash<WebCore::URL> {
 466         typedef WebCore::URLHash Hash;
 467     };
 468 
 469 } // namespace WTF