1 /* 2 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2011, 2012, 2013 Apple Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #pragma once 27 28 #include "PlatformExportMacros.h" 29 #include <wtf/Forward.h> 30 #include <wtf/RetainPtr.h> 31 #include <wtf/text/WTFString.h> 32 33 #if USE(CF) 34 typedef const struct __CFURL* CFURLRef; 35 #if PLATFORM(JAVA) 36 #include <wtf/java/JavaEnv.h> 37 typedef const struct __CFString* CFString; 38 #endif 39 #endif 40 41 #if USE(SOUP) 42 #include "GUniquePtrSoup.h" 43 #endif 44 45 #if USE(FOUNDATION) 46 OBJC_CLASS NSURL; 47 #endif 48 49 namespace WebCore { 50 51 class TextEncoding; 52 struct URLHash; 53 54 enum ParsedURLStringTag { ParsedURLString }; 55 56 class URL { 57 public: 58 // Generates a URL which contains a null string. 59 URL() { invalidate(); } 60 61 // The argument is an absolute URL string. The string is assumed to be output of URL::string() called on a valid 62 // URL object, or indiscernible from such. 63 // It is usually best to avoid repeatedly parsing a string, unless memory saving outweigh the possible slow-downs. 64 WEBCORE_EXPORT URL(ParsedURLStringTag, const String&); 65 explicit URL(WTF::HashTableDeletedValueType) : m_string(WTF::HashTableDeletedValue) { } 66 bool isHashTableDeletedValue() const { return string().isHashTableDeletedValue(); } 67 68 // Resolves the relative URL with the given base URL. If provided, the 69 // TextEncoding is used to encode non-ASCII characers. The base URL can be 70 // null or empty, in which case the relative URL will be interpreted as 71 // absolute. 72 // FIXME: If the base URL is invalid, this always creates an invalid 73 // URL. Instead I think it would be better to treat all invalid base URLs 74 // the same way we treate null and empty base URLs. 75 WEBCORE_EXPORT URL(const URL& base, const String& relative); 76 URL(const URL& base, const String& relative, const TextEncoding&); 77 78 static URL fakeURLWithRelativePart(const String&); 79 static URL fileURLWithFileSystemPath(const String&); 80 81 String strippedForUseAsReferrer() const; 82 83 // FIXME: The above functions should be harmonized so that passing a 84 // base of null or the empty string gives the same result as the 85 // standard String constructor. 86 87 // Makes a deep copy. Helpful only if you need to use a URL on another 88 // thread. Since the underlying StringImpl objects are immutable, there's 89 // no other reason to ever prefer isolatedCopy() over plain old assignment. 90 WEBCORE_EXPORT URL isolatedCopy() const; 91 92 bool isNull() const; 93 bool isEmpty() const; 94 bool isValid() const; 95 96 // Returns true if you can set the host and port for the URL. 97 // Non-hierarchical URLs don't have a host and port. 98 bool canSetHostOrPort() const { return isHierarchical(); } 99 100 bool canSetPathname() const { return isHierarchical(); } 101 bool isHierarchical() const; 102 103 const String& string() const { return m_string; } 104 105 WEBCORE_EXPORT String stringCenterEllipsizedToLength(unsigned length = 1024) const; 106 107 WEBCORE_EXPORT StringView protocol() const; 108 WEBCORE_EXPORT String host() const; 109 WEBCORE_EXPORT std::optional<uint16_t> port() const; 110 WEBCORE_EXPORT String hostAndPort() const; 111 WEBCORE_EXPORT String protocolHostAndPort() const; 112 WEBCORE_EXPORT String user() const; 113 WEBCORE_EXPORT String pass() const; 114 WEBCORE_EXPORT String path() const; 115 WEBCORE_EXPORT String lastPathComponent() const; 116 WEBCORE_EXPORT String query() const; 117 WEBCORE_EXPORT String fragmentIdentifier() const; 118 WEBCORE_EXPORT bool hasFragmentIdentifier() const; 119 120 bool hasUsername() const; 121 bool hasPassword() const; 122 bool hasQuery() const; 123 bool hasFragment() const; 124 125 // Unlike user() and pass(), these functions don't decode escape sequences. 126 // This is necessary for accurate round-tripping, because encoding doesn't encode '%' characters. 127 String encodedUser() const; 128 String encodedPass() const; 129 130 WEBCORE_EXPORT String baseAsString() const; 131 132 WEBCORE_EXPORT String fileSystemPath() const; 133 134 // Returns true if the current URL's protocol is the same as the null- 135 // terminated ASCII argument. The argument must be lower-case. 136 WEBCORE_EXPORT bool protocolIs(const char*) const; 137 bool protocolIs(StringView) const; 138 bool protocolIsBlob() const { return protocolIs("blob"); } 139 bool protocolIsData() const { return protocolIs("data"); } 140 bool protocolIsInHTTPFamily() const; 141 WEBCORE_EXPORT bool isLocalFile() const; 142 bool isBlankURL() const; 143 bool cannotBeABaseURL() const { return m_cannotBeABaseURL; } 144 145 WEBCORE_EXPORT bool setProtocol(const String&); 146 void setHost(const String&); 147 148 void removePort(); 149 void setPort(unsigned short); 150 151 // Input is like "foo.com" or "foo.com:8000". 152 void setHostAndPort(const String&); 153 154 void setUser(const String&); 155 void setPass(const String&); 156 157 // If you pass an empty path for HTTP or HTTPS URLs, the resulting path 158 // will be "/". 159 WEBCORE_EXPORT void setPath(const String&); 160 161 // The query may begin with a question mark, or, if not, one will be added 162 // for you. Setting the query to the empty string will leave a "?" in the 163 // URL (with nothing after it). To clear the query, pass a null string. 164 void setQuery(const String&); 165 166 void setFragmentIdentifier(StringView); 167 void removeFragmentIdentifier(); 168 169 WEBCORE_EXPORT friend bool equalIgnoringFragmentIdentifier(const URL&, const URL&); 170 171 WEBCORE_EXPORT friend bool protocolHostAndPortAreEqual(const URL&, const URL&); 172 173 unsigned hostStart() const; 174 unsigned hostEnd() const; 175 176 unsigned pathStart() const; 177 unsigned pathEnd() const; 178 unsigned pathAfterLastSlash() const; 179 180 operator const String&() const { return string(); } 181 182 #if USE(CF) 183 WEBCORE_EXPORT URL(CFURLRef); 184 WEBCORE_EXPORT RetainPtr<CFURLRef> createCFURL() const; 185 #endif 186 187 #if USE(SOUP) 188 URL(SoupURI*); 189 GUniquePtr<SoupURI> createSoupURI() const; 190 #endif 191 192 #if USE(FOUNDATION) 193 WEBCORE_EXPORT URL(NSURL*); 194 WEBCORE_EXPORT operator NSURL*() const; 195 #endif 196 #ifdef __OBJC__ 197 operator NSString*() const { return string(); } 198 #endif 199 200 #if PLATFORM(JAVA) 201 bool isJarFile() const { return m_protocolIsInJar; } 202 URL(JNIEnv* env, jstring url) : URL(URL(), String(env, url)) {} 203 #endif 204 205 #ifndef NDEBUG 206 void print() const; 207 #endif 208 209 template <class Encoder> void encode(Encoder&) const; 210 template <class Decoder> static bool decode(Decoder&, URL&); 211 212 String serialize(bool omitFragment = false) const; 213 214 private: 215 friend class URLParser; 216 WEBCORE_EXPORT void invalidate(); 217 static bool protocolIs(const String&, const char*); 218 void init(const URL&, const String&, const TextEncoding&); 219 void copyToBuffer(Vector<char, 512>& buffer) const; 220 221 // Parses the given URL. The originalString parameter allows for an 222 // optimization: When the source is the same as the fixed-up string, 223 // it will use the passed-in string instead of allocating a new one. 224 void parse(const String&); 225 void parse(const char* url, const String* originalString = 0); 226 227 bool hasPath() const; 228 229 String m_string; 230 bool m_isValid : 1; 231 bool m_protocolIsInHTTPFamily : 1; 232 #if PLATFORM(JAVA) 233 bool m_protocolIsInJar : 1; 234 #endif 235 bool m_cannotBeABaseURL : 1; 236 237 unsigned m_schemeEnd; 238 unsigned m_userStart; 239 unsigned m_userEnd; 240 unsigned m_passwordEnd; 241 unsigned m_hostEnd; 242 unsigned m_portEnd; 243 unsigned m_pathAfterLastSlash; 244 unsigned m_pathEnd; 245 unsigned m_queryEnd; 246 unsigned m_fragmentEnd; 247 }; 248 249 template <class Encoder> 250 void URL::encode(Encoder& encoder) const 251 { 252 encoder << m_string; 253 encoder << static_cast<bool>(m_isValid); 254 if (!m_isValid) 255 return; 256 encoder << static_cast<bool>(m_protocolIsInHTTPFamily); 257 encoder << m_schemeEnd; 258 encoder << m_userStart; 259 encoder << m_userEnd; 260 encoder << m_passwordEnd; 261 encoder << m_hostEnd; 262 encoder << m_portEnd; 263 encoder << m_pathAfterLastSlash; 264 encoder << m_pathEnd; 265 encoder << m_queryEnd; 266 encoder << m_fragmentEnd; 267 } 268 269 template <class Decoder> 270 bool URL::decode(Decoder& decoder, URL& url) 271 { 272 if (!decoder.decode(url.m_string)) 273 return false; 274 bool isValid; 275 if (!decoder.decode(isValid)) 276 return false; 277 url.m_isValid = isValid; 278 if (!isValid) 279 return true; 280 bool protocolIsInHTTPFamily; 281 if (!decoder.decode(protocolIsInHTTPFamily)) 282 return false; 283 url.m_protocolIsInHTTPFamily = protocolIsInHTTPFamily; 284 if (!decoder.decode(url.m_schemeEnd)) 285 return false; 286 if (!decoder.decode(url.m_userStart)) 287 return false; 288 if (!decoder.decode(url.m_userEnd)) 289 return false; 290 if (!decoder.decode(url.m_passwordEnd)) 291 return false; 292 if (!decoder.decode(url.m_hostEnd)) 293 return false; 294 if (!decoder.decode(url.m_portEnd)) 295 return false; 296 if (!decoder.decode(url.m_pathAfterLastSlash)) 297 return false; 298 if (!decoder.decode(url.m_pathEnd)) 299 return false; 300 if (!decoder.decode(url.m_queryEnd)) 301 return false; 302 if (!decoder.decode(url.m_fragmentEnd)) 303 return false; 304 return true; 305 } 306 307 bool operator==(const URL&, const URL&); 308 bool operator==(const URL&, const String&); 309 bool operator==(const String&, const URL&); 310 bool operator!=(const URL&, const URL&); 311 bool operator!=(const URL&, const String&); 312 bool operator!=(const String&, const URL&); 313 314 WEBCORE_EXPORT bool equalIgnoringFragmentIdentifier(const URL&, const URL&); 315 WEBCORE_EXPORT bool protocolHostAndPortAreEqual(const URL&, const URL&); 316 WEBCORE_EXPORT bool hostsAreEqual(const URL&, const URL&); 317 318 WEBCORE_EXPORT const URL& blankURL(); 319 320 // Functions to do URL operations on strings. 321 // These are operations that aren't faster on a parsed URL. 322 // These are also different from the URL functions in that they don't require the string to be a valid and parsable URL. 323 // This is especially important because valid javascript URLs are not necessarily considered valid by URL. 324 325 WEBCORE_EXPORT bool protocolIs(const String& url, const char* protocol); 326 WEBCORE_EXPORT bool protocolIsJavaScript(const String& url); 327 WEBCORE_EXPORT bool protocolIsInHTTPFamily(const String& url); 328 329 std::optional<uint16_t> defaultPortForProtocol(StringView protocol); 330 WEBCORE_EXPORT bool isDefaultPortForProtocol(uint16_t port, StringView protocol); 331 WEBCORE_EXPORT bool portAllowed(const URL&); // Blacklist ports that should never be used for Web resources. 332 333 WEBCORE_EXPORT void registerDefaultPortForProtocolForTesting(uint16_t port, const String& protocol); 334 WEBCORE_EXPORT void clearDefaultPortForProtocolMapForTesting(); 335 336 bool isValidProtocol(const String&); 337 338 String mimeTypeFromDataURL(const String& url); 339 WEBCORE_EXPORT String mimeTypeFromURL(const URL&); 340 341 // Unescapes the given string using URL escaping rules, given an optional 342 // encoding (defaulting to UTF-8 otherwise). DANGER: If the URL has "%00" 343 // in it, the resulting string will have embedded null characters! 344 WEBCORE_EXPORT String decodeURLEscapeSequences(const String&); 345 String decodeURLEscapeSequences(const String&, const TextEncoding&); 346 347 // FIXME: This is a wrong concept to expose, different parts of a URL need different escaping per the URL Standard. 348 WEBCORE_EXPORT String encodeWithURLEscapeSequences(const String&); 349 350 #if PLATFORM(IOS) 351 WEBCORE_EXPORT void enableURLSchemeCanonicalization(bool); 352 #endif 353 354 // Inlines. 355 356 inline bool operator==(const URL& a, const URL& b) 357 { 358 return a.string() == b.string(); 359 } 360 361 inline bool operator==(const URL& a, const String& b) 362 { 363 return a.string() == b; 364 } 365 366 inline bool operator==(const String& a, const URL& b) 367 { 368 return a == b.string(); 369 } 370 371 inline bool operator!=(const URL& a, const URL& b) 372 { 373 return a.string() != b.string(); 374 } 375 376 inline bool operator!=(const URL& a, const String& b) 377 { 378 return a.string() != b; 379 } 380 381 inline bool operator!=(const String& a, const URL& b) 382 { 383 return a != b.string(); 384 } 385 386 // Inline versions of some non-GoogleURL functions so we can get inlining 387 // without having to have a lot of ugly ifdefs in the class definition. 388 389 inline bool URL::isNull() const 390 { 391 return m_string.isNull(); 392 } 393 394 inline bool URL::isEmpty() const 395 { 396 return m_string.isEmpty(); 397 } 398 399 inline bool URL::isValid() const 400 { 401 return m_isValid; 402 } 403 404 inline bool URL::hasPath() const 405 { 406 return m_pathEnd != m_portEnd; 407 } 408 409 inline bool URL::hasUsername() const 410 { 411 return m_userEnd > m_userStart; 412 } 413 414 inline bool URL::hasPassword() const 415 { 416 return m_passwordEnd > (m_userEnd + 1); 417 } 418 419 inline bool URL::hasQuery() const 420 { 421 return m_queryEnd > m_pathEnd; 422 } 423 424 inline bool URL::hasFragment() const 425 { 426 return m_fragmentEnd > m_queryEnd; 427 } 428 429 inline bool URL::protocolIsInHTTPFamily() const 430 { 431 return m_protocolIsInHTTPFamily; 432 } 433 434 inline unsigned URL::hostStart() const 435 { 436 return (m_passwordEnd == m_userStart) ? m_passwordEnd : m_passwordEnd + 1; 437 } 438 439 inline unsigned URL::hostEnd() const 440 { 441 return m_hostEnd; 442 } 443 444 inline unsigned URL::pathStart() const 445 { 446 return m_portEnd; 447 } 448 449 inline unsigned URL::pathEnd() const 450 { 451 return m_pathEnd; 452 } 453 454 inline unsigned URL::pathAfterLastSlash() const 455 { 456 return m_pathAfterLastSlash; 457 } 458 459 } // namespace WebCore 460 461 namespace WTF { 462 463 // URLHash is the default hash for String 464 template<typename T> struct DefaultHash; 465 template<> struct DefaultHash<WebCore::URL> { 466 typedef WebCore::URLHash Hash; 467 }; 468 469 } // namespace WTF