1 /*
   2  * Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.net;
  27 
  28 import java.io.IOException;
  29 import java.io.InvalidObjectException;
  30 import java.io.ObjectInputStream;
  31 import java.io.ObjectOutputStream;
  32 import java.io.Serializable;
  33 import java.nio.ByteBuffer;
  34 import java.nio.CharBuffer;
  35 import java.nio.charset.CharsetDecoder;
  36 import java.nio.charset.CoderResult;
  37 import java.nio.charset.CodingErrorAction;
  38 import java.nio.charset.CharacterCodingException;
  39 import java.text.Normalizer;
  40 import jdk.internal.loader.URLClassPath;
  41 import jdk.internal.misc.JavaNetUriAccess;
  42 import jdk.internal.misc.SharedSecrets;
  43 import sun.nio.cs.ThreadLocalCoders;
  44 
  45 import java.lang.Character;             // for javadoc
  46 import java.lang.NullPointerException;  // for javadoc
  47 
  48 
  49 /**
  50  * Represents a Uniform Resource Identifier (URI) reference.
  51  *
  52  * <p> Aside from some minor deviations noted below, an instance of this
  53  * class represents a URI reference as defined by
  54  * <a href="http://www.ietf.org/rfc/rfc2396.txt"><i>RFC&nbsp;2396: Uniform
  55  * Resource Identifiers (URI): Generic Syntax</i></a>, amended by <a
  56  * href="http://www.ietf.org/rfc/rfc2732.txt"><i>RFC&nbsp;2732: Format for
  57  * Literal IPv6 Addresses in URLs</i></a>. The Literal IPv6 address format
  58  * also supports scope_ids. The syntax and usage of scope_ids is described
  59  * <a href="Inet6Address.html#scoped">here</a>.
  60  * This class provides constructors for creating URI instances from
  61  * their components or by parsing their string forms, methods for accessing the
  62  * various components of an instance, and methods for normalizing, resolving,
  63  * and relativizing URI instances.  Instances of this class are immutable.
  64  *
  65  *
  66  * <h3> URI syntax and components </h3>
  67  *
  68  * At the highest level a URI reference (hereinafter simply "URI") in string
  69  * form has the syntax
  70  *
  71  * <blockquote>
  72  * [<i>scheme</i><b>{@code :}</b>]<i>scheme-specific-part</i>[<b>{@code #}</b><i>fragment</i>]
  73  * </blockquote>
  74  *
  75  * where square brackets [...] delineate optional components and the characters
  76  * <b>{@code :}</b> and <b>{@code #}</b> stand for themselves.
  77  *
  78  * <p> An <i>absolute</i> URI specifies a scheme; a URI that is not absolute is
  79  * said to be <i>relative</i>.  URIs are also classified according to whether
  80  * they are <i>opaque</i> or <i>hierarchical</i>.
  81  *
  82  * <p> An <i>opaque</i> URI is an absolute URI whose scheme-specific part does
  83  * not begin with a slash character ({@code '/'}).  Opaque URIs are not
  84  * subject to further parsing.  Some examples of opaque URIs are:
  85  *
  86  * <blockquote><ul style="list-style-type:none">
  87  * <li>{@code mailto:java-net@java.sun.com}</li>
  88  * <li>{@code news:comp.lang.java}</li>
  89  * <li>{@code urn:isbn:096139210x}</li>
  90  * </ul></blockquote>
  91  *
  92  * <p> A <i>hierarchical</i> URI is either an absolute URI whose
  93  * scheme-specific part begins with a slash character, or a relative URI, that
  94  * is, a URI that does not specify a scheme.  Some examples of hierarchical
  95  * URIs are:
  96  *
  97  * <blockquote>
  98  * {@code http://example.com/languages/java/}<br>
  99  * {@code sample/a/index.html#28}<br>
 100  * {@code ../../demo/b/index.html}<br>
 101  * {@code file:///~/calendar}
 102  * </blockquote>
 103  *
 104  * <p> A hierarchical URI is subject to further parsing according to the syntax
 105  *
 106  * <blockquote>
 107  * [<i>scheme</i><b>{@code :}</b>][<b>{@code //}</b><i>authority</i>][<i>path</i>][<b>{@code ?}</b><i>query</i>][<b>{@code #}</b><i>fragment</i>]
 108  * </blockquote>
 109  *
 110  * where the characters <b>{@code :}</b>, <b>{@code /}</b>,
 111  * <b>{@code ?}</b>, and <b>{@code #}</b> stand for themselves.  The
 112  * scheme-specific part of a hierarchical URI consists of the characters
 113  * between the scheme and fragment components.
 114  *
 115  * <p> The authority component of a hierarchical URI is, if specified, either
 116  * <i>server-based</i> or <i>registry-based</i>.  A server-based authority
 117  * parses according to the familiar syntax
 118  *
 119  * <blockquote>
 120  * [<i>user-info</i><b>{@code @}</b>]<i>host</i>[<b>{@code :}</b><i>port</i>]
 121  * </blockquote>
 122  *
 123  * where the characters <b>{@code @}</b> and <b>{@code :}</b> stand for
 124  * themselves.  Nearly all URI schemes currently in use are server-based.  An
 125  * authority component that does not parse in this way is considered to be
 126  * registry-based.
 127  *
 128  * <p> The path component of a hierarchical URI is itself said to be absolute
 129  * if it begins with a slash character ({@code '/'}); otherwise it is
 130  * relative.  The path of a hierarchical URI that is either absolute or
 131  * specifies an authority is always absolute.
 132  *
 133  * <p> All told, then, a URI instance has the following nine components:
 134  *
 135  * <blockquote><table>
 136  * <caption style="display:none">Describes the components of a URI:scheme,scheme-specific-part,authority,user-info,host,port,path,query,fragment</caption>
 137  * <thead>
 138  * <tr><th><i>Component</i></th><th><i>Type</i></th></tr>
 139  * </thead>
 140  * <tbody>
 141  * <tr><td>scheme</td><td>{@code String}</td></tr>
 142  * <tr><td>scheme-specific-part&nbsp;&nbsp;&nbsp;&nbsp;</td><td>{@code String}</td></tr>
 143  * <tr><td>authority</td><td>{@code String}</td></tr>
 144  * <tr><td>user-info</td><td>{@code String}</td></tr>
 145  * <tr><td>host</td><td>{@code String}</td></tr>
 146  * <tr><td>port</td><td>{@code int}</td></tr>
 147  * <tr><td>path</td><td>{@code String}</td></tr>
 148  * <tr><td>query</td><td>{@code String}</td></tr>
 149  * <tr><td>fragment</td><td>{@code String}</td></tr>
 150  * </tbody>
 151  * </table></blockquote>
 152  *
 153  * In a given instance any particular component is either <i>undefined</i> or
 154  * <i>defined</i> with a distinct value.  Undefined string components are
 155  * represented by {@code null}, while undefined integer components are
 156  * represented by {@code -1}.  A string component may be defined to have the
 157  * empty string as its value; this is not equivalent to that component being
 158  * undefined.
 159  *
 160  * <p> Whether a particular component is or is not defined in an instance
 161  * depends upon the type of the URI being represented.  An absolute URI has a
 162  * scheme component.  An opaque URI has a scheme, a scheme-specific part, and
 163  * possibly a fragment, but has no other components.  A hierarchical URI always
 164  * has a path (though it may be empty) and a scheme-specific-part (which at
 165  * least contains the path), and may have any of the other components.  If the
 166  * authority component is present and is server-based then the host component
 167  * will be defined and the user-information and port components may be defined.
 168  *
 169  *
 170  * <h4> Operations on URI instances </h4>
 171  *
 172  * The key operations supported by this class are those of
 173  * <i>normalization</i>, <i>resolution</i>, and <i>relativization</i>.
 174  *
 175  * <p> <i>Normalization</i> is the process of removing unnecessary {@code "."}
 176  * and {@code ".."} segments from the path component of a hierarchical URI.
 177  * Each {@code "."} segment is simply removed.  A {@code ".."} segment is
 178  * removed only if it is preceded by a non-{@code ".."} segment.
 179  * Normalization has no effect upon opaque URIs.
 180  *
 181  * <p> <i>Resolution</i> is the process of resolving one URI against another,
 182  * <i>base</i> URI.  The resulting URI is constructed from components of both
 183  * URIs in the manner specified by RFC&nbsp;2396, taking components from the
 184  * base URI for those not specified in the original.  For hierarchical URIs,
 185  * the path of the original is resolved against the path of the base and then
 186  * normalized.  The result, for example, of resolving
 187  *
 188  * <blockquote>
 189  * {@code sample/a/index.html#28}
 190  * &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
 191  * &nbsp;&nbsp;&nbsp;&nbsp;(1)
 192  * </blockquote>
 193  *
 194  * against the base URI {@code http://example.com/languages/java/} is the result
 195  * URI
 196  *
 197  * <blockquote>
 198  * {@code http://example.com/languages/java/sample/a/index.html#28}
 199  * </blockquote>
 200  *
 201  * Resolving the relative URI
 202  *
 203  * <blockquote>
 204  * {@code ../../demo/b/index.html}&nbsp;&nbsp;&nbsp;&nbsp;(2)
 205  * </blockquote>
 206  *
 207  * against this result yields, in turn,
 208  *
 209  * <blockquote>
 210  * {@code http://example.com/languages/java/demo/b/index.html}
 211  * </blockquote>
 212  *
 213  * Resolution of both absolute and relative URIs, and of both absolute and
 214  * relative paths in the case of hierarchical URIs, is supported.  Resolving
 215  * the URI {@code file:///~calendar} against any other URI simply yields the
 216  * original URI, since it is absolute.  Resolving the relative URI (2) above
 217  * against the relative base URI (1) yields the normalized, but still relative,
 218  * URI
 219  *
 220  * <blockquote>
 221  * {@code demo/b/index.html}
 222  * </blockquote>
 223  *
 224  * <p> <i>Relativization</i>, finally, is the inverse of resolution: For any
 225  * two normalized URIs <i>u</i> and&nbsp;<i>v</i>,
 226  *
 227  * <blockquote>
 228  *   <i>u</i>{@code .relativize(}<i>u</i>{@code .resolve(}<i>v</i>{@code )).equals(}<i>v</i>{@code )}&nbsp;&nbsp;and<br>
 229  *   <i>u</i>{@code .resolve(}<i>u</i>{@code .relativize(}<i>v</i>{@code )).equals(}<i>v</i>{@code )}&nbsp;&nbsp;.<br>
 230  * </blockquote>
 231  *
 232  * This operation is often useful when constructing a document containing URIs
 233  * that must be made relative to the base URI of the document wherever
 234  * possible.  For example, relativizing the URI
 235  *
 236  * <blockquote>
 237  * {@code http://example.com/languages/java/sample/a/index.html#28}
 238  * </blockquote>
 239  *
 240  * against the base URI
 241  *
 242  * <blockquote>
 243  * {@code http://example.com/languages/java/}
 244  * </blockquote>
 245  *
 246  * yields the relative URI {@code sample/a/index.html#28}.
 247  *
 248  *
 249  * <h4> Character categories </h4>
 250  *
 251  * RFC&nbsp;2396 specifies precisely which characters are permitted in the
 252  * various components of a URI reference.  The following categories, most of
 253  * which are taken from that specification, are used below to describe these
 254  * constraints:
 255  *
 256  * <blockquote><table>
 257  * <caption style="display:none">Describes categories alpha,digit,alphanum,unreserved,punct,reserved,escaped,and other</caption>
 258  *   <tbody>
 259  *   <tr><th valign=top><i>alpha</i></th>
 260  *       <td>The US-ASCII alphabetic characters,
 261  *        {@code 'A'}&nbsp;through&nbsp;{@code 'Z'}
 262  *        and {@code 'a'}&nbsp;through&nbsp;{@code 'z'}</td></tr>
 263  *   <tr><th valign=top><i>digit</i></th>
 264  *       <td>The US-ASCII decimal digit characters,
 265  *       {@code '0'}&nbsp;through&nbsp;{@code '9'}</td></tr>
 266  *   <tr><th valign=top><i>alphanum</i></th>
 267  *       <td>All <i>alpha</i> and <i>digit</i> characters</td></tr>
 268  *   <tr><th valign=top><i>unreserved</i>&nbsp;&nbsp;&nbsp;&nbsp;</th>
 269  *       <td>All <i>alphanum</i> characters together with those in the string
 270  *        {@code "_-!.~'()*"}</td></tr>
 271  *   <tr><th valign=top><i>punct</i></th>
 272  *       <td>The characters in the string {@code ",;:$&+="}</td></tr>
 273  *   <tr><th valign=top><i>reserved</i></th>
 274  *       <td>All <i>punct</i> characters together with those in the string
 275  *        {@code "?/[]@"}</td></tr>
 276  *   <tr><th valign=top><i>escaped</i></th>
 277  *       <td>Escaped octets, that is, triplets consisting of the percent
 278  *           character ({@code '%'}) followed by two hexadecimal digits
 279  *           ({@code '0'}-{@code '9'}, {@code 'A'}-{@code 'F'}, and
 280  *           {@code 'a'}-{@code 'f'})</td></tr>
 281  *   <tr><th valign=top><i>other</i></th>
 282  *       <td>The Unicode characters that are not in the US-ASCII character set,
 283  *           are not control characters (according to the {@link
 284  *           java.lang.Character#isISOControl(char) Character.isISOControl}
 285  *           method), and are not space characters (according to the {@link
 286  *           java.lang.Character#isSpaceChar(char) Character.isSpaceChar}
 287  *           method)&nbsp;&nbsp;<i>(<b>Deviation from RFC 2396</b>, which is
 288  *           limited to US-ASCII)</i></td></tr>
 289  * </tbody>
 290  * </table></blockquote>
 291  *
 292  * <p><a id="legal-chars"></a> The set of all legal URI characters consists of
 293  * the <i>unreserved</i>, <i>reserved</i>, <i>escaped</i>, and <i>other</i>
 294  * characters.
 295  *
 296  *
 297  * <h4> Escaped octets, quotation, encoding, and decoding </h4>
 298  *
 299  * RFC 2396 allows escaped octets to appear in the user-info, path, query, and
 300  * fragment components.  Escaping serves two purposes in URIs:
 301  *
 302  * <ul>
 303  *
 304  *   <li><p> To <i>encode</i> non-US-ASCII characters when a URI is required to
 305  *   conform strictly to RFC&nbsp;2396 by not containing any <i>other</i>
 306  *   characters.  </p></li>
 307  *
 308  *   <li><p> To <i>quote</i> characters that are otherwise illegal in a
 309  *   component.  The user-info, path, query, and fragment components differ
 310  *   slightly in terms of which characters are considered legal and illegal.
 311  *   </p></li>
 312  *
 313  * </ul>
 314  *
 315  * These purposes are served in this class by three related operations:
 316  *
 317  * <ul>
 318  *
 319  *   <li><p><a id="encode"></a> A character is <i>encoded</i> by replacing it
 320  *   with the sequence of escaped octets that represent that character in the
 321  *   UTF-8 character set.  The Euro currency symbol ({@code '\u005Cu20AC'}),
 322  *   for example, is encoded as {@code "%E2%82%AC"}.  <i>(<b>Deviation from
 323  *   RFC&nbsp;2396</b>, which does not specify any particular character
 324  *   set.)</i> </p></li>
 325  *
 326  *   <li><p><a id="quote"></a> An illegal character is <i>quoted</i> simply by
 327  *   encoding it.  The space character, for example, is quoted by replacing it
 328  *   with {@code "%20"}.  UTF-8 contains US-ASCII, hence for US-ASCII
 329  *   characters this transformation has exactly the effect required by
 330  *   RFC&nbsp;2396. </p></li>
 331  *
 332  *   <li><p><a id="decode"></a>
 333  *   A sequence of escaped octets is <i>decoded</i> by
 334  *   replacing it with the sequence of characters that it represents in the
 335  *   UTF-8 character set.  UTF-8 contains US-ASCII, hence decoding has the
 336  *   effect of de-quoting any quoted US-ASCII characters as well as that of
 337  *   decoding any encoded non-US-ASCII characters.  If a <a
 338  *   href="../nio/charset/CharsetDecoder.html#ce">decoding error</a> occurs
 339  *   when decoding the escaped octets then the erroneous octets are replaced by
 340  *   {@code '\u005CuFFFD'}, the Unicode replacement character.  </p></li>
 341  *
 342  * </ul>
 343  *
 344  * These operations are exposed in the constructors and methods of this class
 345  * as follows:
 346  *
 347  * <ul>
 348  *
 349  *   <li><p> The {@linkplain #URI(java.lang.String) single-argument
 350  *   constructor} requires any illegal characters in its argument to be
 351  *   quoted and preserves any escaped octets and <i>other</i> characters that
 352  *   are present.  </p></li>
 353  *
 354  *   <li><p> The {@linkplain
 355  *   #URI(java.lang.String,java.lang.String,java.lang.String,int,java.lang.String,java.lang.String,java.lang.String)
 356  *   multi-argument constructors} quote illegal characters as
 357  *   required by the components in which they appear.  The percent character
 358  *   ({@code '%'}) is always quoted by these constructors.  Any <i>other</i>
 359  *   characters are preserved.  </p></li>
 360  *
 361  *   <li><p> The {@link #getRawUserInfo() getRawUserInfo}, {@link #getRawPath()
 362  *   getRawPath}, {@link #getRawQuery() getRawQuery}, {@link #getRawFragment()
 363  *   getRawFragment}, {@link #getRawAuthority() getRawAuthority}, and {@link
 364  *   #getRawSchemeSpecificPart() getRawSchemeSpecificPart} methods return the
 365  *   values of their corresponding components in raw form, without interpreting
 366  *   any escaped octets.  The strings returned by these methods may contain
 367  *   both escaped octets and <i>other</i> characters, and will not contain any
 368  *   illegal characters.  </p></li>
 369  *
 370  *   <li><p> The {@link #getUserInfo() getUserInfo}, {@link #getPath()
 371  *   getPath}, {@link #getQuery() getQuery}, {@link #getFragment()
 372  *   getFragment}, {@link #getAuthority() getAuthority}, and {@link
 373  *   #getSchemeSpecificPart() getSchemeSpecificPart} methods decode any escaped
 374  *   octets in their corresponding components.  The strings returned by these
 375  *   methods may contain both <i>other</i> characters and illegal characters,
 376  *   and will not contain any escaped octets.  </p></li>
 377  *
 378  *   <li><p> The {@link #toString() toString} method returns a URI string with
 379  *   all necessary quotation but which may contain <i>other</i> characters.
 380  *   </p></li>
 381  *
 382  *   <li><p> The {@link #toASCIIString() toASCIIString} method returns a fully
 383  *   quoted and encoded URI string that does not contain any <i>other</i>
 384  *   characters.  </p></li>
 385  *
 386  * </ul>
 387  *
 388  *
 389  * <h4> Identities </h4>
 390  *
 391  * For any URI <i>u</i>, it is always the case that
 392  *
 393  * <blockquote>
 394  * {@code new URI(}<i>u</i>{@code .toString()).equals(}<i>u</i>{@code )}&nbsp;.
 395  * </blockquote>
 396  *
 397  * For any URI <i>u</i> that does not contain redundant syntax such as two
 398  * slashes before an empty authority (as in {@code file:///tmp/}&nbsp;) or a
 399  * colon following a host name but no port (as in
 400  * {@code http://java.sun.com:}&nbsp;), and that does not encode characters
 401  * except those that must be quoted, the following identities also hold:
 402  * <pre>
 403  *     new URI(<i>u</i>.getScheme(),
 404  *             <i>u</i>.getSchemeSpecificPart(),
 405  *             <i>u</i>.getFragment())
 406  *     .equals(<i>u</i>)</pre>
 407  * in all cases,
 408  * <pre>
 409  *     new URI(<i>u</i>.getScheme(),
 410  *             <i>u</i>.getAuthority(),
 411  *             <i>u</i>.getPath(), <i>u</i>.getQuery(),
 412  *             <i>u</i>.getFragment())
 413  *     .equals(<i>u</i>)</pre>
 414  * if <i>u</i> is hierarchical, and
 415  * <pre>
 416  *     new URI(<i>u</i>.getScheme(),
 417  *             <i>u</i>.getUserInfo(), <i>u</i>.getHost(), <i>u</i>.getPort(),
 418  *             <i>u</i>.getPath(), <i>u</i>.getQuery(),
 419  *             <i>u</i>.getFragment())
 420  *     .equals(<i>u</i>)</pre>
 421  * if <i>u</i> is hierarchical and has either no authority or a server-based
 422  * authority.
 423  *
 424  *
 425  * <h4> URIs, URLs, and URNs </h4>
 426  *
 427  * A URI is a uniform resource <i>identifier</i> while a URL is a uniform
 428  * resource <i>locator</i>.  Hence every URL is a URI, abstractly speaking, but
 429  * not every URI is a URL.  This is because there is another subcategory of
 430  * URIs, uniform resource <i>names</i> (URNs), which name resources but do not
 431  * specify how to locate them.  The {@code mailto}, {@code news}, and
 432  * {@code isbn} URIs shown above are examples of URNs.
 433  *
 434  * <p> The conceptual distinction between URIs and URLs is reflected in the
 435  * differences between this class and the {@link URL} class.
 436  *
 437  * <p> An instance of this class represents a URI reference in the syntactic
 438  * sense defined by RFC&nbsp;2396.  A URI may be either absolute or relative.
 439  * A URI string is parsed according to the generic syntax without regard to the
 440  * scheme, if any, that it specifies.  No lookup of the host, if any, is
 441  * performed, and no scheme-dependent stream handler is constructed.  Equality,
 442  * hashing, and comparison are defined strictly in terms of the character
 443  * content of the instance.  In other words, a URI instance is little more than
 444  * a structured string that supports the syntactic, scheme-independent
 445  * operations of comparison, normalization, resolution, and relativization.
 446  *
 447  * <p> An instance of the {@link URL} class, by contrast, represents the
 448  * syntactic components of a URL together with some of the information required
 449  * to access the resource that it describes.  A URL must be absolute, that is,
 450  * it must always specify a scheme.  A URL string is parsed according to its
 451  * scheme.  A stream handler is always established for a URL, and in fact it is
 452  * impossible to create a URL instance for a scheme for which no handler is
 453  * available.  Equality and hashing depend upon both the scheme and the
 454  * Internet address of the host, if any; comparison is not defined.  In other
 455  * words, a URL is a structured string that supports the syntactic operation of
 456  * resolution as well as the network I/O operations of looking up the host and
 457  * opening a connection to the specified resource.
 458  *
 459  *
 460  * @author Mark Reinhold
 461  * @since 1.4
 462  *
 463  * @see <a href="http://www.ietf.org/rfc/rfc2279.txt"><i>RFC&nbsp;2279: UTF-8, a
 464  * transformation format of ISO 10646</i></a>, <br><a
 465  * href="http://www.ietf.org/rfc/rfc2373.txt"><i>RFC&nbsp;2373: IPv6 Addressing
 466  * Architecture</i></a>, <br><a
 467  * href="http://www.ietf.org/rfc/rfc2396.txt"><i>RFC&nbsp;2396: Uniform
 468  * Resource Identifiers (URI): Generic Syntax</i></a>, <br><a
 469  * href="http://www.ietf.org/rfc/rfc2732.txt"><i>RFC&nbsp;2732: Format for
 470  * Literal IPv6 Addresses in URLs</i></a>, <br><a
 471  * href="URISyntaxException.html">URISyntaxException</a>
 472  */
 473 
 474 public final class URI
 475     implements Comparable<URI>, Serializable
 476 {
 477 
 478     // Note: Comments containing the word "ASSERT" indicate places where a
 479     // throw of an InternalError should be replaced by an appropriate assertion
 480     // statement once asserts are enabled in the build.
 481 
 482     static final long serialVersionUID = -6052424284110960213L;
 483 
 484 
 485     // -- Properties and components of this instance --
 486 
 487     // Components of all URIs: [<scheme>:]<scheme-specific-part>[#<fragment>]
 488     private transient String scheme;            // null ==> relative URI
 489     private transient String fragment;
 490 
 491     // Hierarchical URI components: [//<authority>]<path>[?<query>]
 492     private transient String authority;         // Registry or server
 493 
 494     // Server-based authority: [<userInfo>@]<host>[:<port>]
 495     private transient String userInfo;
 496     private transient String host;              // null ==> registry-based
 497     private transient int port = -1;            // -1 ==> undefined
 498 
 499     // Remaining components of hierarchical URIs
 500     private transient String path;              // null ==> opaque
 501     private transient String query;
 502 
 503     // The remaining fields may be computed on demand, which is safe even in
 504     // the face of multiple threads racing to initialize them
 505     private transient String schemeSpecificPart;
 506     private transient int hash;        // Zero ==> undefined
 507 
 508     private transient String decodedUserInfo;
 509     private transient String decodedAuthority;
 510     private transient String decodedPath;
 511     private transient String decodedQuery;
 512     private transient String decodedFragment;
 513     private transient String decodedSchemeSpecificPart;
 514 
 515     /**
 516      * The string form of this URI.
 517      *
 518      * @serial
 519      */
 520     private volatile String string;             // The only serializable field
 521 
 522 
 523 
 524     // -- Constructors and factories --
 525 
 526     private URI() { }                           // Used internally
 527 
 528     /**
 529      * Constructs a URI by parsing the given string.
 530      *
 531      * <p> This constructor parses the given string exactly as specified by the
 532      * grammar in <a
 533      * href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>,
 534      * Appendix&nbsp;A, <b><i>except for the following deviations:</i></b> </p>
 535      *
 536      * <ul>
 537      *
 538      *   <li><p> An empty authority component is permitted as long as it is
 539      *   followed by a non-empty path, a query component, or a fragment
 540      *   component.  This allows the parsing of URIs such as
 541      *   {@code "file:///foo/bar"}, which seems to be the intent of
 542      *   RFC&nbsp;2396 although the grammar does not permit it.  If the
 543      *   authority component is empty then the user-information, host, and port
 544      *   components are undefined. </p></li>
 545      *
 546      *   <li><p> Empty relative paths are permitted; this seems to be the
 547      *   intent of RFC&nbsp;2396 although the grammar does not permit it.  The
 548      *   primary consequence of this deviation is that a standalone fragment
 549      *   such as {@code "#foo"} parses as a relative URI with an empty path
 550      *   and the given fragment, and can be usefully <a
 551      *   href="#resolve-frag">resolved</a> against a base URI.
 552      *
 553      *   <li><p> IPv4 addresses in host components are parsed rigorously, as
 554      *   specified by <a
 555      *   href="http://www.ietf.org/rfc/rfc2732.txt">RFC&nbsp;2732</a>: Each
 556      *   element of a dotted-quad address must contain no more than three
 557      *   decimal digits.  Each element is further constrained to have a value
 558      *   no greater than 255. </p></li>
 559      *
 560      *   <li> <p> Hostnames in host components that comprise only a single
 561      *   domain label are permitted to start with an <i>alphanum</i>
 562      *   character. This seems to be the intent of <a
 563      *   href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>
 564      *   section&nbsp;3.2.2 although the grammar does not permit it. The
 565      *   consequence of this deviation is that the authority component of a
 566      *   hierarchical URI such as {@code s://123}, will parse as a server-based
 567      *   authority. </p></li>
 568      *
 569      *   <li><p> IPv6 addresses are permitted for the host component.  An IPv6
 570      *   address must be enclosed in square brackets ({@code '['} and
 571      *   {@code ']'}) as specified by <a
 572      *   href="http://www.ietf.org/rfc/rfc2732.txt">RFC&nbsp;2732</a>.  The
 573      *   IPv6 address itself must parse according to <a
 574      *   href="http://www.ietf.org/rfc/rfc2373.txt">RFC&nbsp;2373</a>.  IPv6
 575      *   addresses are further constrained to describe no more than sixteen
 576      *   bytes of address information, a constraint implicit in RFC&nbsp;2373
 577      *   but not expressible in the grammar. </p></li>
 578      *
 579      *   <li><p> Characters in the <i>other</i> category are permitted wherever
 580      *   RFC&nbsp;2396 permits <i>escaped</i> octets, that is, in the
 581      *   user-information, path, query, and fragment components, as well as in
 582      *   the authority component if the authority is registry-based.  This
 583      *   allows URIs to contain Unicode characters beyond those in the US-ASCII
 584      *   character set. </p></li>
 585      *
 586      * </ul>
 587      *
 588      * @param  str   The string to be parsed into a URI
 589      *
 590      * @throws  NullPointerException
 591      *          If {@code str} is {@code null}
 592      *
 593      * @throws  URISyntaxException
 594      *          If the given string violates RFC&nbsp;2396, as augmented
 595      *          by the above deviations
 596      */
 597     public URI(String str) throws URISyntaxException {
 598         new Parser(str).parse(false);
 599     }
 600 
 601     /**
 602      * Constructs a hierarchical URI from the given components.
 603      *
 604      * <p> If a scheme is given then the path, if also given, must either be
 605      * empty or begin with a slash character ({@code '/'}).  Otherwise a
 606      * component of the new URI may be left undefined by passing {@code null}
 607      * for the corresponding parameter or, in the case of the {@code port}
 608      * parameter, by passing {@code -1}.
 609      *
 610      * <p> This constructor first builds a URI string from the given components
 611      * according to the rules specified in <a
 612      * href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>,
 613      * section&nbsp;5.2, step&nbsp;7: </p>
 614      *
 615      * <ol>
 616      *
 617      *   <li><p> Initially, the result string is empty. </p></li>
 618      *
 619      *   <li><p> If a scheme is given then it is appended to the result,
 620      *   followed by a colon character ({@code ':'}).  </p></li>
 621      *
 622      *   <li><p> If user information, a host, or a port are given then the
 623      *   string {@code "//"} is appended.  </p></li>
 624      *
 625      *   <li><p> If user information is given then it is appended, followed by
 626      *   a commercial-at character ({@code '@'}).  Any character not in the
 627      *   <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>
 628      *   categories is <a href="#quote">quoted</a>.  </p></li>
 629      *
 630      *   <li><p> If a host is given then it is appended.  If the host is a
 631      *   literal IPv6 address but is not enclosed in square brackets
 632      *   ({@code '['} and {@code ']'}) then the square brackets are added.
 633      *   </p></li>
 634      *
 635      *   <li><p> If a port number is given then a colon character
 636      *   ({@code ':'}) is appended, followed by the port number in decimal.
 637      *   </p></li>
 638      *
 639      *   <li><p> If a path is given then it is appended.  Any character not in
 640      *   the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>
 641      *   categories, and not equal to the slash character ({@code '/'}) or the
 642      *   commercial-at character ({@code '@'}), is quoted.  </p></li>
 643      *
 644      *   <li><p> If a query is given then a question-mark character
 645      *   ({@code '?'}) is appended, followed by the query.  Any character that
 646      *   is not a <a href="#legal-chars">legal URI character</a> is quoted.
 647      *   </p></li>
 648      *
 649      *   <li><p> Finally, if a fragment is given then a hash character
 650      *   ({@code '#'}) is appended, followed by the fragment.  Any character
 651      *   that is not a legal URI character is quoted.  </p></li>
 652      *
 653      * </ol>
 654      *
 655      * <p> The resulting URI string is then parsed as if by invoking the {@link
 656      * #URI(String)} constructor and then invoking the {@link
 657      * #parseServerAuthority()} method upon the result; this may cause a {@link
 658      * URISyntaxException} to be thrown.  </p>
 659      *
 660      * @param   scheme    Scheme name
 661      * @param   userInfo  User name and authorization information
 662      * @param   host      Host name
 663      * @param   port      Port number
 664      * @param   path      Path
 665      * @param   query     Query
 666      * @param   fragment  Fragment
 667      *
 668      * @throws URISyntaxException
 669      *         If both a scheme and a path are given but the path is relative,
 670      *         if the URI string constructed from the given components violates
 671      *         RFC&nbsp;2396, or if the authority component of the string is
 672      *         present but cannot be parsed as a server-based authority
 673      */
 674     public URI(String scheme,
 675                String userInfo, String host, int port,
 676                String path, String query, String fragment)
 677         throws URISyntaxException
 678     {
 679         String s = toString(scheme, null,
 680                             null, userInfo, host, port,
 681                             path, query, fragment);
 682         checkPath(s, scheme, path);
 683         new Parser(s).parse(true);
 684     }
 685 
 686     /**
 687      * Constructs a hierarchical URI from the given components.
 688      *
 689      * <p> If a scheme is given then the path, if also given, must either be
 690      * empty or begin with a slash character ({@code '/'}).  Otherwise a
 691      * component of the new URI may be left undefined by passing {@code null}
 692      * for the corresponding parameter.
 693      *
 694      * <p> This constructor first builds a URI string from the given components
 695      * according to the rules specified in <a
 696      * href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>,
 697      * section&nbsp;5.2, step&nbsp;7: </p>
 698      *
 699      * <ol>
 700      *
 701      *   <li><p> Initially, the result string is empty.  </p></li>
 702      *
 703      *   <li><p> If a scheme is given then it is appended to the result,
 704      *   followed by a colon character ({@code ':'}).  </p></li>
 705      *
 706      *   <li><p> If an authority is given then the string {@code "//"} is
 707      *   appended, followed by the authority.  If the authority contains a
 708      *   literal IPv6 address then the address must be enclosed in square
 709      *   brackets ({@code '['} and {@code ']'}).  Any character not in the
 710      *   <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>
 711      *   categories, and not equal to the commercial-at character
 712      *   ({@code '@'}), is <a href="#quote">quoted</a>.  </p></li>
 713      *
 714      *   <li><p> If a path is given then it is appended.  Any character not in
 715      *   the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>
 716      *   categories, and not equal to the slash character ({@code '/'}) or the
 717      *   commercial-at character ({@code '@'}), is quoted.  </p></li>
 718      *
 719      *   <li><p> If a query is given then a question-mark character
 720      *   ({@code '?'}) is appended, followed by the query.  Any character that
 721      *   is not a <a href="#legal-chars">legal URI character</a> is quoted.
 722      *   </p></li>
 723      *
 724      *   <li><p> Finally, if a fragment is given then a hash character
 725      *   ({@code '#'}) is appended, followed by the fragment.  Any character
 726      *   that is not a legal URI character is quoted.  </p></li>
 727      *
 728      * </ol>
 729      *
 730      * <p> The resulting URI string is then parsed as if by invoking the {@link
 731      * #URI(String)} constructor and then invoking the {@link
 732      * #parseServerAuthority()} method upon the result; this may cause a {@link
 733      * URISyntaxException} to be thrown.  </p>
 734      *
 735      * @param   scheme     Scheme name
 736      * @param   authority  Authority
 737      * @param   path       Path
 738      * @param   query      Query
 739      * @param   fragment   Fragment
 740      *
 741      * @throws URISyntaxException
 742      *         If both a scheme and a path are given but the path is relative,
 743      *         if the URI string constructed from the given components violates
 744      *         RFC&nbsp;2396, or if the authority component of the string is
 745      *         present but cannot be parsed as a server-based authority
 746      */
 747     public URI(String scheme,
 748                String authority,
 749                String path, String query, String fragment)
 750         throws URISyntaxException
 751     {
 752         String s = toString(scheme, null,
 753                             authority, null, null, -1,
 754                             path, query, fragment);
 755         checkPath(s, scheme, path);
 756         new Parser(s).parse(false);
 757     }
 758 
 759     /**
 760      * Constructs a hierarchical URI from the given components.
 761      *
 762      * <p> A component may be left undefined by passing {@code null}.
 763      *
 764      * <p> This convenience constructor works as if by invoking the
 765      * seven-argument constructor as follows:
 766      *
 767      * <blockquote>
 768      * {@code new} {@link #URI(String, String, String, int, String, String, String)
 769      * URI}{@code (scheme, null, host, -1, path, null, fragment);}
 770      * </blockquote>
 771      *
 772      * @param   scheme    Scheme name
 773      * @param   host      Host name
 774      * @param   path      Path
 775      * @param   fragment  Fragment
 776      *
 777      * @throws  URISyntaxException
 778      *          If the URI string constructed from the given components
 779      *          violates RFC&nbsp;2396
 780      */
 781     public URI(String scheme, String host, String path, String fragment)
 782         throws URISyntaxException
 783     {
 784         this(scheme, null, host, -1, path, null, fragment);
 785     }
 786 
 787     /**
 788      * Constructs a URI from the given components.
 789      *
 790      * <p> A component may be left undefined by passing {@code null}.
 791      *
 792      * <p> This constructor first builds a URI in string form using the given
 793      * components as follows:  </p>
 794      *
 795      * <ol>
 796      *
 797      *   <li><p> Initially, the result string is empty.  </p></li>
 798      *
 799      *   <li><p> If a scheme is given then it is appended to the result,
 800      *   followed by a colon character ({@code ':'}).  </p></li>
 801      *
 802      *   <li><p> If a scheme-specific part is given then it is appended.  Any
 803      *   character that is not a <a href="#legal-chars">legal URI character</a>
 804      *   is <a href="#quote">quoted</a>.  </p></li>
 805      *
 806      *   <li><p> Finally, if a fragment is given then a hash character
 807      *   ({@code '#'}) is appended to the string, followed by the fragment.
 808      *   Any character that is not a legal URI character is quoted.  </p></li>
 809      *
 810      * </ol>
 811      *
 812      * <p> The resulting URI string is then parsed in order to create the new
 813      * URI instance as if by invoking the {@link #URI(String)} constructor;
 814      * this may cause a {@link URISyntaxException} to be thrown.  </p>
 815      *
 816      * @param   scheme    Scheme name
 817      * @param   ssp       Scheme-specific part
 818      * @param   fragment  Fragment
 819      *
 820      * @throws  URISyntaxException
 821      *          If the URI string constructed from the given components
 822      *          violates RFC&nbsp;2396
 823      */
 824     public URI(String scheme, String ssp, String fragment)
 825         throws URISyntaxException
 826     {
 827         new Parser(toString(scheme, ssp,
 828                             null, null, null, -1,
 829                             null, null, fragment))
 830             .parse(false);
 831     }
 832 
 833     /**
 834      * Constructs a simple URI consisting of only a scheme and a pre-validated
 835      * path. Provides a fast-path for some internal cases.
 836      */
 837     URI(String scheme, String path) {
 838         assert validSchemeAndPath(scheme, path);
 839         this.scheme = scheme;
 840         this.path = path;
 841     }
 842 
 843     private static boolean validSchemeAndPath(String scheme, String path) {
 844         try {
 845             URI u = new URI(scheme + ":" + path);
 846             return scheme.equals(u.scheme) && path.equals(u.path);
 847         } catch (URISyntaxException e) {
 848             return false;
 849         }
 850     }
 851 
 852     /**
 853      * Creates a URI by parsing the given string.
 854      *
 855      * <p> This convenience factory method works as if by invoking the {@link
 856      * #URI(String)} constructor; any {@link URISyntaxException} thrown by the
 857      * constructor is caught and wrapped in a new {@link
 858      * IllegalArgumentException} object, which is then thrown.
 859      *
 860      * <p> This method is provided for use in situations where it is known that
 861      * the given string is a legal URI, for example for URI constants declared
 862      * within in a program, and so it would be considered a programming error
 863      * for the string not to parse as such.  The constructors, which throw
 864      * {@link URISyntaxException} directly, should be used situations where a
 865      * URI is being constructed from user input or from some other source that
 866      * may be prone to errors.  </p>
 867      *
 868      * @param  str   The string to be parsed into a URI
 869      * @return The new URI
 870      *
 871      * @throws  NullPointerException
 872      *          If {@code str} is {@code null}
 873      *
 874      * @throws  IllegalArgumentException
 875      *          If the given string violates RFC&nbsp;2396
 876      */
 877     public static URI create(String str) {
 878         try {
 879             return new URI(str);
 880         } catch (URISyntaxException x) {
 881             throw new IllegalArgumentException(x.getMessage(), x);
 882         }
 883     }
 884 
 885 
 886     // -- Operations --
 887 
 888     /**
 889      * Attempts to parse this URI's authority component, if defined, into
 890      * user-information, host, and port components.
 891      *
 892      * <p> If this URI's authority component has already been recognized as
 893      * being server-based then it will already have been parsed into
 894      * user-information, host, and port components.  In this case, or if this
 895      * URI has no authority component, this method simply returns this URI.
 896      *
 897      * <p> Otherwise this method attempts once more to parse the authority
 898      * component into user-information, host, and port components, and throws
 899      * an exception describing why the authority component could not be parsed
 900      * in that way.
 901      *
 902      * <p> This method is provided because the generic URI syntax specified in
 903      * <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>
 904      * cannot always distinguish a malformed server-based authority from a
 905      * legitimate registry-based authority.  It must therefore treat some
 906      * instances of the former as instances of the latter.  The authority
 907      * component in the URI string {@code "//foo:bar"}, for example, is not a
 908      * legal server-based authority but it is legal as a registry-based
 909      * authority.
 910      *
 911      * <p> In many common situations, for example when working URIs that are
 912      * known to be either URNs or URLs, the hierarchical URIs being used will
 913      * always be server-based.  They therefore must either be parsed as such or
 914      * treated as an error.  In these cases a statement such as
 915      *
 916      * <blockquote>
 917      * {@code URI }<i>u</i>{@code  = new URI(str).parseServerAuthority();}
 918      * </blockquote>
 919      *
 920      * <p> can be used to ensure that <i>u</i> always refers to a URI that, if
 921      * it has an authority component, has a server-based authority with proper
 922      * user-information, host, and port components.  Invoking this method also
 923      * ensures that if the authority could not be parsed in that way then an
 924      * appropriate diagnostic message can be issued based upon the exception
 925      * that is thrown. </p>
 926      *
 927      * @return  A URI whose authority field has been parsed
 928      *          as a server-based authority
 929      *
 930      * @throws  URISyntaxException
 931      *          If the authority component of this URI is defined
 932      *          but cannot be parsed as a server-based authority
 933      *          according to RFC&nbsp;2396
 934      */
 935     public URI parseServerAuthority()
 936         throws URISyntaxException
 937     {
 938         // We could be clever and cache the error message and index from the
 939         // exception thrown during the original parse, but that would require
 940         // either more fields or a more-obscure representation.
 941         if ((host != null) || (authority == null))
 942             return this;
 943         new Parser(toString()).parse(true);
 944         return this;
 945     }
 946 
 947     /**
 948      * Normalizes this URI's path.
 949      *
 950      * <p> If this URI is opaque, or if its path is already in normal form,
 951      * then this URI is returned.  Otherwise a new URI is constructed that is
 952      * identical to this URI except that its path is computed by normalizing
 953      * this URI's path in a manner consistent with <a
 954      * href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>,
 955      * section&nbsp;5.2, step&nbsp;6, sub-steps&nbsp;c through&nbsp;f; that is:
 956      * </p>
 957      *
 958      * <ol>
 959      *
 960      *   <li><p> All {@code "."} segments are removed. </p></li>
 961      *
 962      *   <li><p> If a {@code ".."} segment is preceded by a non-{@code ".."}
 963      *   segment then both of these segments are removed.  This step is
 964      *   repeated until it is no longer applicable. </p></li>
 965      *
 966      *   <li><p> If the path is relative, and if its first segment contains a
 967      *   colon character ({@code ':'}), then a {@code "."} segment is
 968      *   prepended.  This prevents a relative URI with a path such as
 969      *   {@code "a:b/c/d"} from later being re-parsed as an opaque URI with a
 970      *   scheme of {@code "a"} and a scheme-specific part of {@code "b/c/d"}.
 971      *   <b><i>(Deviation from RFC&nbsp;2396)</i></b> </p></li>
 972      *
 973      * </ol>
 974      *
 975      * <p> A normalized path will begin with one or more {@code ".."} segments
 976      * if there were insufficient non-{@code ".."} segments preceding them to
 977      * allow their removal.  A normalized path will begin with a {@code "."}
 978      * segment if one was inserted by step 3 above.  Otherwise, a normalized
 979      * path will not contain any {@code "."} or {@code ".."} segments. </p>
 980      *
 981      * @return  A URI equivalent to this URI,
 982      *          but whose path is in normal form
 983      */
 984     public URI normalize() {
 985         return normalize(this);
 986     }
 987 
 988     /**
 989      * Resolves the given URI against this URI.
 990      *
 991      * <p> If the given URI is already absolute, or if this URI is opaque, then
 992      * the given URI is returned.
 993      *
 994      * <p><a id="resolve-frag"></a> If the given URI's fragment component is
 995      * defined, its path component is empty, and its scheme, authority, and
 996      * query components are undefined, then a URI with the given fragment but
 997      * with all other components equal to those of this URI is returned.  This
 998      * allows a URI representing a standalone fragment reference, such as
 999      * {@code "#foo"}, to be usefully resolved against a base URI.
1000      *
1001      * <p> Otherwise this method constructs a new hierarchical URI in a manner
1002      * consistent with <a
1003      * href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>,
1004      * section&nbsp;5.2; that is: </p>
1005      *
1006      * <ol>
1007      *
1008      *   <li><p> A new URI is constructed with this URI's scheme and the given
1009      *   URI's query and fragment components. </p></li>
1010      *
1011      *   <li><p> If the given URI has an authority component then the new URI's
1012      *   authority and path are taken from the given URI. </p></li>
1013      *
1014      *   <li><p> Otherwise the new URI's authority component is copied from
1015      *   this URI, and its path is computed as follows: </p>
1016      *
1017      *   <ol>
1018      *
1019      *     <li><p> If the given URI's path is absolute then the new URI's path
1020      *     is taken from the given URI. </p></li>
1021      *
1022      *     <li><p> Otherwise the given URI's path is relative, and so the new
1023      *     URI's path is computed by resolving the path of the given URI
1024      *     against the path of this URI.  This is done by concatenating all but
1025      *     the last segment of this URI's path, if any, with the given URI's
1026      *     path and then normalizing the result as if by invoking the {@link
1027      *     #normalize() normalize} method. </p></li>
1028      *
1029      *   </ol></li>
1030      *
1031      * </ol>
1032      *
1033      * <p> The result of this method is absolute if, and only if, either this
1034      * URI is absolute or the given URI is absolute.  </p>
1035      *
1036      * @param  uri  The URI to be resolved against this URI
1037      * @return The resulting URI
1038      *
1039      * @throws  NullPointerException
1040      *          If {@code uri} is {@code null}
1041      */
1042     public URI resolve(URI uri) {
1043         return resolve(this, uri);
1044     }
1045 
1046     /**
1047      * Constructs a new URI by parsing the given string and then resolving it
1048      * against this URI.
1049      *
1050      * <p> This convenience method works as if invoking it were equivalent to
1051      * evaluating the expression {@link #resolve(java.net.URI)
1052      * resolve}{@code (URI.}{@link #create(String) create}{@code (str))}. </p>
1053      *
1054      * @param  str   The string to be parsed into a URI
1055      * @return The resulting URI
1056      *
1057      * @throws  NullPointerException
1058      *          If {@code str} is {@code null}
1059      *
1060      * @throws  IllegalArgumentException
1061      *          If the given string violates RFC&nbsp;2396
1062      */
1063     public URI resolve(String str) {
1064         return resolve(URI.create(str));
1065     }
1066 
1067     /**
1068      * Relativizes the given URI against this URI.
1069      *
1070      * <p> The relativization of the given URI against this URI is computed as
1071      * follows: </p>
1072      *
1073      * <ol>
1074      *
1075      *   <li><p> If either this URI or the given URI are opaque, or if the
1076      *   scheme and authority components of the two URIs are not identical, or
1077      *   if the path of this URI is not a prefix of the path of the given URI,
1078      *   then the given URI is returned. </p></li>
1079      *
1080      *   <li><p> Otherwise a new relative hierarchical URI is constructed with
1081      *   query and fragment components taken from the given URI and with a path
1082      *   component computed by removing this URI's path from the beginning of
1083      *   the given URI's path. </p></li>
1084      *
1085      * </ol>
1086      *
1087      * @param  uri  The URI to be relativized against this URI
1088      * @return The resulting URI
1089      *
1090      * @throws  NullPointerException
1091      *          If {@code uri} is {@code null}
1092      */
1093     public URI relativize(URI uri) {
1094         return relativize(this, uri);
1095     }
1096 
1097     /**
1098      * Constructs a URL from this URI.
1099      *
1100      * <p> This convenience method works as if invoking it were equivalent to
1101      * evaluating the expression {@code new URL(this.toString())} after
1102      * first checking that this URI is absolute. </p>
1103      *
1104      * @return  A URL constructed from this URI
1105      *
1106      * @throws  IllegalArgumentException
1107      *          If this URL is not absolute
1108      *
1109      * @throws  MalformedURLException
1110      *          If a protocol handler for the URL could not be found,
1111      *          or if some other error occurred while constructing the URL
1112      */
1113     public URL toURL() throws MalformedURLException {
1114         return URL.fromURI(this);
1115     }
1116 
1117     // -- Component access methods --
1118 
1119     /**
1120      * Returns the scheme component of this URI.
1121      *
1122      * <p> The scheme component of a URI, if defined, only contains characters
1123      * in the <i>alphanum</i> category and in the string {@code "-.+"}.  A
1124      * scheme always starts with an <i>alpha</i> character. <p>
1125      *
1126      * The scheme component of a URI cannot contain escaped octets, hence this
1127      * method does not perform any decoding.
1128      *
1129      * @return  The scheme component of this URI,
1130      *          or {@code null} if the scheme is undefined
1131      */
1132     public String getScheme() {
1133         return scheme;
1134     }
1135 
1136     /**
1137      * Tells whether or not this URI is absolute.
1138      *
1139      * <p> A URI is absolute if, and only if, it has a scheme component. </p>
1140      *
1141      * @return  {@code true} if, and only if, this URI is absolute
1142      */
1143     public boolean isAbsolute() {
1144         return scheme != null;
1145     }
1146 
1147     /**
1148      * Tells whether or not this URI is opaque.
1149      *
1150      * <p> A URI is opaque if, and only if, it is absolute and its
1151      * scheme-specific part does not begin with a slash character ('/').
1152      * An opaque URI has a scheme, a scheme-specific part, and possibly
1153      * a fragment; all other components are undefined. </p>
1154      *
1155      * @return  {@code true} if, and only if, this URI is opaque
1156      */
1157     public boolean isOpaque() {
1158         return path == null;
1159     }
1160 
1161     /**
1162      * Returns the raw scheme-specific part of this URI.  The scheme-specific
1163      * part is never undefined, though it may be empty.
1164      *
1165      * <p> The scheme-specific part of a URI only contains legal URI
1166      * characters. </p>
1167      *
1168      * @return  The raw scheme-specific part of this URI
1169      *          (never {@code null})
1170      */
1171     public String getRawSchemeSpecificPart() {
1172         String part = schemeSpecificPart;
1173         if (part != null) {
1174             return part;
1175         }
1176 
1177         String s = string;
1178         if (s != null) {
1179             // if string is defined, components will have been parsed
1180             int start = 0;
1181             int end = s.length();
1182             if (scheme != null) {
1183                 start = scheme.length() + 1;
1184             }
1185             if (fragment != null) {
1186                 end -= fragment.length() + 1;
1187             }
1188             if (path != null && path.length() == end - start) {
1189                 part = path;
1190             } else {
1191                 part = s.substring(start, end);
1192             }
1193         } else {
1194             StringBuilder sb = new StringBuilder();
1195             appendSchemeSpecificPart(sb, null, getAuthority(), getUserInfo(),
1196                                  host, port, getPath(), getQuery());
1197             part = sb.toString();
1198         }
1199         return schemeSpecificPart = part;
1200     }
1201 
1202     /**
1203      * Returns the decoded scheme-specific part of this URI.
1204      *
1205      * <p> The string returned by this method is equal to that returned by the
1206      * {@link #getRawSchemeSpecificPart() getRawSchemeSpecificPart} method
1207      * except that all sequences of escaped octets are <a
1208      * href="#decode">decoded</a>.  </p>
1209      *
1210      * @return  The decoded scheme-specific part of this URI
1211      *          (never {@code null})
1212      */
1213     public String getSchemeSpecificPart() {
1214         String part = decodedSchemeSpecificPart;
1215         if (part == null) {
1216             decodedSchemeSpecificPart = part = decode(getRawSchemeSpecificPart());
1217         }
1218         return part;
1219     }
1220 
1221     /**
1222      * Returns the raw authority component of this URI.
1223      *
1224      * <p> The authority component of a URI, if defined, only contains the
1225      * commercial-at character ({@code '@'}) and characters in the
1226      * <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, and <i>other</i>
1227      * categories.  If the authority is server-based then it is further
1228      * constrained to have valid user-information, host, and port
1229      * components. </p>
1230      *
1231      * @return  The raw authority component of this URI,
1232      *          or {@code null} if the authority is undefined
1233      */
1234     public String getRawAuthority() {
1235         return authority;
1236     }
1237 
1238     /**
1239      * Returns the decoded authority component of this URI.
1240      *
1241      * <p> The string returned by this method is equal to that returned by the
1242      * {@link #getRawAuthority() getRawAuthority} method except that all
1243      * sequences of escaped octets are <a href="#decode">decoded</a>.  </p>
1244      *
1245      * @return  The decoded authority component of this URI,
1246      *          or {@code null} if the authority is undefined
1247      */
1248     public String getAuthority() {
1249         String auth = decodedAuthority;
1250         if ((auth == null) && (authority != null)) {
1251             decodedAuthority = auth = decode(authority);
1252         }
1253         return auth;
1254     }
1255 
1256     /**
1257      * Returns the raw user-information component of this URI.
1258      *
1259      * <p> The user-information component of a URI, if defined, only contains
1260      * characters in the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, and
1261      * <i>other</i> categories. </p>
1262      *
1263      * @return  The raw user-information component of this URI,
1264      *          or {@code null} if the user information is undefined
1265      */
1266     public String getRawUserInfo() {
1267         return userInfo;
1268     }
1269 
1270     /**
1271      * Returns the decoded user-information component of this URI.
1272      *
1273      * <p> The string returned by this method is equal to that returned by the
1274      * {@link #getRawUserInfo() getRawUserInfo} method except that all
1275      * sequences of escaped octets are <a href="#decode">decoded</a>.  </p>
1276      *
1277      * @return  The decoded user-information component of this URI,
1278      *          or {@code null} if the user information is undefined
1279      */
1280     public String getUserInfo() {
1281         String user = decodedUserInfo;
1282         if ((user == null) && (userInfo != null)) {
1283             decodedUserInfo = user = decode(userInfo);
1284         }
1285         return user;
1286     }
1287 
1288     /**
1289      * Returns the host component of this URI.
1290      *
1291      * <p> The host component of a URI, if defined, will have one of the
1292      * following forms: </p>
1293      *
1294      * <ul>
1295      *
1296      *   <li><p> A domain name consisting of one or more <i>labels</i>
1297      *   separated by period characters ({@code '.'}), optionally followed by
1298      *   a period character.  Each label consists of <i>alphanum</i> characters
1299      *   as well as hyphen characters ({@code '-'}), though hyphens never
1300      *   occur as the first or last characters in a label. The rightmost
1301      *   label of a domain name consisting of two or more labels, begins
1302      *   with an <i>alpha</i> character. </li>
1303      *
1304      *   <li><p> A dotted-quad IPv4 address of the form
1305      *   <i>digit</i>{@code +.}<i>digit</i>{@code +.}<i>digit</i>{@code +.}<i>digit</i>{@code +},
1306      *   where no <i>digit</i> sequence is longer than three characters and no
1307      *   sequence has a value larger than 255. </p></li>
1308      *
1309      *   <li><p> An IPv6 address enclosed in square brackets ({@code '['} and
1310      *   {@code ']'}) and consisting of hexadecimal digits, colon characters
1311      *   ({@code ':'}), and possibly an embedded IPv4 address.  The full
1312      *   syntax of IPv6 addresses is specified in <a
1313      *   href="http://www.ietf.org/rfc/rfc2373.txt"><i>RFC&nbsp;2373: IPv6
1314      *   Addressing Architecture</i></a>.  </p></li>
1315      *
1316      * </ul>
1317      *
1318      * The host component of a URI cannot contain escaped octets, hence this
1319      * method does not perform any decoding.
1320      *
1321      * @return  The host component of this URI,
1322      *          or {@code null} if the host is undefined
1323      */
1324     public String getHost() {
1325         return host;
1326     }
1327 
1328     /**
1329      * Returns the port number of this URI.
1330      *
1331      * <p> The port component of a URI, if defined, is a non-negative
1332      * integer. </p>
1333      *
1334      * @return  The port component of this URI,
1335      *          or {@code -1} if the port is undefined
1336      */
1337     public int getPort() {
1338         return port;
1339     }
1340 
1341     /**
1342      * Returns the raw path component of this URI.
1343      *
1344      * <p> The path component of a URI, if defined, only contains the slash
1345      * character ({@code '/'}), the commercial-at character ({@code '@'}),
1346      * and characters in the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>,
1347      * and <i>other</i> categories. </p>
1348      *
1349      * @return  The path component of this URI,
1350      *          or {@code null} if the path is undefined
1351      */
1352     public String getRawPath() {
1353         return path;
1354     }
1355 
1356     /**
1357      * Returns the decoded path component of this URI.
1358      *
1359      * <p> The string returned by this method is equal to that returned by the
1360      * {@link #getRawPath() getRawPath} method except that all sequences of
1361      * escaped octets are <a href="#decode">decoded</a>.  </p>
1362      *
1363      * @return  The decoded path component of this URI,
1364      *          or {@code null} if the path is undefined
1365      */
1366     public String getPath() {
1367         String decoded = decodedPath;
1368         if ((decoded == null) && (path != null)) {
1369             decodedPath = decoded = decode(path);
1370         }
1371         return decoded;
1372     }
1373 
1374     /**
1375      * Returns the raw query component of this URI.
1376      *
1377      * <p> The query component of a URI, if defined, only contains legal URI
1378      * characters. </p>
1379      *
1380      * @return  The raw query component of this URI,
1381      *          or {@code null} if the query is undefined
1382      */
1383     public String getRawQuery() {
1384         return query;
1385     }
1386 
1387     /**
1388      * Returns the decoded query component of this URI.
1389      *
1390      * <p> The string returned by this method is equal to that returned by the
1391      * {@link #getRawQuery() getRawQuery} method except that all sequences of
1392      * escaped octets are <a href="#decode">decoded</a>.  </p>
1393      *
1394      * @return  The decoded query component of this URI,
1395      *          or {@code null} if the query is undefined
1396      */
1397     public String getQuery() {
1398         String decoded = decodedQuery;
1399         if ((decoded == null) && (query != null)) {
1400             decodedQuery = decoded = decode(query, false);
1401         }
1402         return decoded;
1403     }
1404 
1405     /**
1406      * Returns the raw fragment component of this URI.
1407      *
1408      * <p> The fragment component of a URI, if defined, only contains legal URI
1409      * characters. </p>
1410      *
1411      * @return  The raw fragment component of this URI,
1412      *          or {@code null} if the fragment is undefined
1413      */
1414     public String getRawFragment() {
1415         return fragment;
1416     }
1417 
1418     /**
1419      * Returns the decoded fragment component of this URI.
1420      *
1421      * <p> The string returned by this method is equal to that returned by the
1422      * {@link #getRawFragment() getRawFragment} method except that all
1423      * sequences of escaped octets are <a href="#decode">decoded</a>.  </p>
1424      *
1425      * @return  The decoded fragment component of this URI,
1426      *          or {@code null} if the fragment is undefined
1427      */
1428     public String getFragment() {
1429         String decoded = decodedFragment;
1430         if ((decoded == null) && (fragment != null)) {
1431             decodedFragment = decoded = decode(fragment, false);
1432         }
1433         return decoded;
1434     }
1435 
1436 
1437     // -- Equality, comparison, hash code, toString, and serialization --
1438 
1439     /**
1440      * Tests this URI for equality with another object.
1441      *
1442      * <p> If the given object is not a URI then this method immediately
1443      * returns {@code false}.
1444      *
1445      * <p> For two URIs to be considered equal requires that either both are
1446      * opaque or both are hierarchical.  Their schemes must either both be
1447      * undefined or else be equal without regard to case. Their fragments
1448      * must either both be undefined or else be equal.
1449      *
1450      * <p> For two opaque URIs to be considered equal, their scheme-specific
1451      * parts must be equal.
1452      *
1453      * <p> For two hierarchical URIs to be considered equal, their paths must
1454      * be equal and their queries must either both be undefined or else be
1455      * equal.  Their authorities must either both be undefined, or both be
1456      * registry-based, or both be server-based.  If their authorities are
1457      * defined and are registry-based, then they must be equal.  If their
1458      * authorities are defined and are server-based, then their hosts must be
1459      * equal without regard to case, their port numbers must be equal, and
1460      * their user-information components must be equal.
1461      *
1462      * <p> When testing the user-information, path, query, fragment, authority,
1463      * or scheme-specific parts of two URIs for equality, the raw forms rather
1464      * than the encoded forms of these components are compared and the
1465      * hexadecimal digits of escaped octets are compared without regard to
1466      * case.
1467      *
1468      * <p> This method satisfies the general contract of the {@link
1469      * java.lang.Object#equals(Object) Object.equals} method. </p>
1470      *
1471      * @param   ob   The object to which this object is to be compared
1472      *
1473      * @return  {@code true} if, and only if, the given object is a URI that
1474      *          is identical to this URI
1475      */
1476     public boolean equals(Object ob) {
1477         if (ob == this)
1478             return true;
1479         if (!(ob instanceof URI))
1480             return false;
1481         URI that = (URI)ob;
1482         if (this.isOpaque() != that.isOpaque()) return false;
1483         if (!equalIgnoringCase(this.scheme, that.scheme)) return false;
1484         if (!equal(this.fragment, that.fragment)) return false;
1485 
1486         // Opaque
1487         if (this.isOpaque())
1488             return equal(this.schemeSpecificPart, that.schemeSpecificPart);
1489 
1490         // Hierarchical
1491         if (!equal(this.path, that.path)) return false;
1492         if (!equal(this.query, that.query)) return false;
1493 
1494         // Authorities
1495         if (this.authority == that.authority) return true;
1496         if (this.host != null) {
1497             // Server-based
1498             if (!equal(this.userInfo, that.userInfo)) return false;
1499             if (!equalIgnoringCase(this.host, that.host)) return false;
1500             if (this.port != that.port) return false;
1501         } else if (this.authority != null) {
1502             // Registry-based
1503             if (!equal(this.authority, that.authority)) return false;
1504         } else if (this.authority != that.authority) {
1505             return false;
1506         }
1507 
1508         return true;
1509     }
1510 
1511     /**
1512      * Returns a hash-code value for this URI.  The hash code is based upon all
1513      * of the URI's components, and satisfies the general contract of the
1514      * {@link java.lang.Object#hashCode() Object.hashCode} method.
1515      *
1516      * @return  A hash-code value for this URI
1517      */
1518     public int hashCode() {
1519         int h = hash;
1520         if (h == 0) {
1521             h = hashIgnoringCase(0, scheme);
1522             h = hash(h, fragment);
1523             if (isOpaque()) {
1524                 h = hash(h, schemeSpecificPart);
1525             } else {
1526                 h = hash(h, path);
1527                 h = hash(h, query);
1528                 if (host != null) {
1529                     h = hash(h, userInfo);
1530                     h = hashIgnoringCase(h, host);
1531                     h += 1949 * port;
1532                 } else {
1533                     h = hash(h, authority);
1534                 }
1535             }
1536             if (h != 0) {
1537                 hash = h;
1538             }
1539         }
1540         return h;
1541     }
1542 
1543     /**
1544      * Compares this URI to another object, which must be a URI.
1545      *
1546      * <p> When comparing corresponding components of two URIs, if one
1547      * component is undefined but the other is defined then the first is
1548      * considered to be less than the second.  Unless otherwise noted, string
1549      * components are ordered according to their natural, case-sensitive
1550      * ordering as defined by the {@link java.lang.String#compareTo(Object)
1551      * String.compareTo} method.  String components that are subject to
1552      * encoding are compared by comparing their raw forms rather than their
1553      * encoded forms.
1554      *
1555      * <p> The ordering of URIs is defined as follows: </p>
1556      *
1557      * <ul>
1558      *
1559      *   <li><p> Two URIs with different schemes are ordered according the
1560      *   ordering of their schemes, without regard to case. </p></li>
1561      *
1562      *   <li><p> A hierarchical URI is considered to be less than an opaque URI
1563      *   with an identical scheme. </p></li>
1564      *
1565      *   <li><p> Two opaque URIs with identical schemes are ordered according
1566      *   to the ordering of their scheme-specific parts. </p></li>
1567      *
1568      *   <li><p> Two opaque URIs with identical schemes and scheme-specific
1569      *   parts are ordered according to the ordering of their
1570      *   fragments. </p></li>
1571      *
1572      *   <li><p> Two hierarchical URIs with identical schemes are ordered
1573      *   according to the ordering of their authority components: </p>
1574      *
1575      *   <ul>
1576      *
1577      *     <li><p> If both authority components are server-based then the URIs
1578      *     are ordered according to their user-information components; if these
1579      *     components are identical then the URIs are ordered according to the
1580      *     ordering of their hosts, without regard to case; if the hosts are
1581      *     identical then the URIs are ordered according to the ordering of
1582      *     their ports. </p></li>
1583      *
1584      *     <li><p> If one or both authority components are registry-based then
1585      *     the URIs are ordered according to the ordering of their authority
1586      *     components. </p></li>
1587      *
1588      *   </ul></li>
1589      *
1590      *   <li><p> Finally, two hierarchical URIs with identical schemes and
1591      *   authority components are ordered according to the ordering of their
1592      *   paths; if their paths are identical then they are ordered according to
1593      *   the ordering of their queries; if the queries are identical then they
1594      *   are ordered according to the order of their fragments. </p></li>
1595      *
1596      * </ul>
1597      *
1598      * <p> This method satisfies the general contract of the {@link
1599      * java.lang.Comparable#compareTo(Object) Comparable.compareTo}
1600      * method. </p>
1601      *
1602      * @param   that
1603      *          The object to which this URI is to be compared
1604      *
1605      * @return  A negative integer, zero, or a positive integer as this URI is
1606      *          less than, equal to, or greater than the given URI
1607      *
1608      * @throws  ClassCastException
1609      *          If the given object is not a URI
1610      */
1611     public int compareTo(URI that) {
1612         int c;
1613 
1614         if ((c = compareIgnoringCase(this.scheme, that.scheme)) != 0)
1615             return c;
1616 
1617         if (this.isOpaque()) {
1618             if (that.isOpaque()) {
1619                 // Both opaque
1620                 if ((c = compare(this.schemeSpecificPart,
1621                                  that.schemeSpecificPart)) != 0)
1622                     return c;
1623                 return compare(this.fragment, that.fragment);
1624             }
1625             return +1;                  // Opaque > hierarchical
1626         } else if (that.isOpaque()) {
1627             return -1;                  // Hierarchical < opaque
1628         }
1629 
1630         // Hierarchical
1631         if ((this.host != null) && (that.host != null)) {
1632             // Both server-based
1633             if ((c = compare(this.userInfo, that.userInfo)) != 0)
1634                 return c;
1635             if ((c = compareIgnoringCase(this.host, that.host)) != 0)
1636                 return c;
1637             if ((c = this.port - that.port) != 0)
1638                 return c;
1639         } else {
1640             // If one or both authorities are registry-based then we simply
1641             // compare them in the usual, case-sensitive way.  If one is
1642             // registry-based and one is server-based then the strings are
1643             // guaranteed to be unequal, hence the comparison will never return
1644             // zero and the compareTo and equals methods will remain
1645             // consistent.
1646             if ((c = compare(this.authority, that.authority)) != 0) return c;
1647         }
1648 
1649         if ((c = compare(this.path, that.path)) != 0) return c;
1650         if ((c = compare(this.query, that.query)) != 0) return c;
1651         return compare(this.fragment, that.fragment);
1652     }
1653 
1654     /**
1655      * Returns the content of this URI as a string.
1656      *
1657      * <p> If this URI was created by invoking one of the constructors in this
1658      * class then a string equivalent to the original input string, or to the
1659      * string computed from the originally-given components, as appropriate, is
1660      * returned.  Otherwise this URI was created by normalization, resolution,
1661      * or relativization, and so a string is constructed from this URI's
1662      * components according to the rules specified in <a
1663      * href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>,
1664      * section&nbsp;5.2, step&nbsp;7. </p>
1665      *
1666      * @return  The string form of this URI
1667      */
1668     public String toString() {
1669         String s = string;
1670         if (s == null) {
1671             s = defineString();
1672         }
1673         return s;
1674     }
1675 
1676     private String defineString() {
1677         String s = string;
1678         if (s != null) {
1679             return s;
1680         }
1681 
1682         StringBuilder sb = new StringBuilder();
1683         if (scheme != null) {
1684             sb.append(scheme);
1685             sb.append(':');
1686         }
1687         if (isOpaque()) {
1688             sb.append(schemeSpecificPart);
1689         } else {
1690             if (host != null) {
1691                 sb.append("//");
1692                 if (userInfo != null) {
1693                     sb.append(userInfo);
1694                     sb.append('@');
1695                 }
1696                 boolean needBrackets = ((host.indexOf(':') >= 0)
1697                         && !host.startsWith("[")
1698                         && !host.endsWith("]"));
1699                 if (needBrackets) sb.append('[');
1700                 sb.append(host);
1701                 if (needBrackets) sb.append(']');
1702                 if (port != -1) {
1703                     sb.append(':');
1704                     sb.append(port);
1705                 }
1706             } else if (authority != null) {
1707                 sb.append("//");
1708                 sb.append(authority);
1709             }
1710             if (path != null)
1711                 sb.append(path);
1712             if (query != null) {
1713                 sb.append('?');
1714                 sb.append(query);
1715             }
1716         }
1717         if (fragment != null) {
1718             sb.append('#');
1719             sb.append(fragment);
1720         }
1721         return string = sb.toString();
1722     }
1723 
1724     /**
1725      * Returns the content of this URI as a US-ASCII string.
1726      *
1727      * <p> If this URI does not contain any characters in the <i>other</i>
1728      * category then an invocation of this method will return the same value as
1729      * an invocation of the {@link #toString() toString} method.  Otherwise
1730      * this method works as if by invoking that method and then <a
1731      * href="#encode">encoding</a> the result.  </p>
1732      *
1733      * @return  The string form of this URI, encoded as needed
1734      *          so that it only contains characters in the US-ASCII
1735      *          charset
1736      */
1737     public String toASCIIString() {
1738         return encode(toString());
1739     }
1740 
1741 
1742     // -- Serialization support --
1743 
1744     /**
1745      * Saves the content of this URI to the given serial stream.
1746      *
1747      * <p> The only serializable field of a URI instance is its {@code string}
1748      * field.  That field is given a value, if it does not have one already,
1749      * and then the {@link java.io.ObjectOutputStream#defaultWriteObject()}
1750      * method of the given object-output stream is invoked. </p>
1751      *
1752      * @param  os  The object-output stream to which this object
1753      *             is to be written
1754      */
1755     private void writeObject(ObjectOutputStream os)
1756         throws IOException
1757     {
1758         defineString();
1759         os.defaultWriteObject();        // Writes the string field only
1760     }
1761 
1762     /**
1763      * Reconstitutes a URI from the given serial stream.
1764      *
1765      * <p> The {@link java.io.ObjectInputStream#defaultReadObject()} method is
1766      * invoked to read the value of the {@code string} field.  The result is
1767      * then parsed in the usual way.
1768      *
1769      * @param  is  The object-input stream from which this object
1770      *             is being read
1771      */
1772     private void readObject(ObjectInputStream is)
1773         throws ClassNotFoundException, IOException
1774     {
1775         port = -1;                      // Argh
1776         is.defaultReadObject();
1777         try {
1778             new Parser(string).parse(false);
1779         } catch (URISyntaxException x) {
1780             IOException y = new InvalidObjectException("Invalid URI");
1781             y.initCause(x);
1782             throw y;
1783         }
1784     }
1785 
1786 
1787     // -- End of public methods --
1788 
1789 
1790     // -- Utility methods for string-field comparison and hashing --
1791 
1792     // These methods return appropriate values for null string arguments,
1793     // thereby simplifying the equals, hashCode, and compareTo methods.
1794     //
1795     // The case-ignoring methods should only be applied to strings whose
1796     // characters are all known to be US-ASCII.  Because of this restriction,
1797     // these methods are faster than the similar methods in the String class.
1798 
1799     // US-ASCII only
1800     private static int toLower(char c) {
1801         if ((c >= 'A') && (c <= 'Z'))
1802             return c + ('a' - 'A');
1803         return c;
1804     }
1805 
1806     // US-ASCII only
1807     private static int toUpper(char c) {
1808         if ((c >= 'a') && (c <= 'z'))
1809             return c - ('a' - 'A');
1810         return c;
1811     }
1812 
1813     private static boolean equal(String s, String t) {
1814         if (s == t) return true;
1815         if ((s != null) && (t != null)) {
1816             if (s.length() != t.length())
1817                 return false;
1818             if (s.indexOf('%') < 0)
1819                 return s.equals(t);
1820             int n = s.length();
1821             for (int i = 0; i < n;) {
1822                 char c = s.charAt(i);
1823                 char d = t.charAt(i);
1824                 if (c != '%') {
1825                     if (c != d)
1826                         return false;
1827                     i++;
1828                     continue;
1829                 }
1830                 if (d != '%')
1831                     return false;
1832                 i++;
1833                 if (toLower(s.charAt(i)) != toLower(t.charAt(i)))
1834                     return false;
1835                 i++;
1836                 if (toLower(s.charAt(i)) != toLower(t.charAt(i)))
1837                     return false;
1838                 i++;
1839             }
1840             return true;
1841         }
1842         return false;
1843     }
1844 
1845     // US-ASCII only
1846     private static boolean equalIgnoringCase(String s, String t) {
1847         if (s == t) return true;
1848         if ((s != null) && (t != null)) {
1849             int n = s.length();
1850             if (t.length() != n)
1851                 return false;
1852             for (int i = 0; i < n; i++) {
1853                 if (toLower(s.charAt(i)) != toLower(t.charAt(i)))
1854                     return false;
1855             }
1856             return true;
1857         }
1858         return false;
1859     }
1860 
1861     private static int hash(int hash, String s) {
1862         if (s == null) return hash;
1863         return s.indexOf('%') < 0 ? hash * 127 + s.hashCode()
1864                                   : normalizedHash(hash, s);
1865     }
1866 
1867 
1868     private static int normalizedHash(int hash, String s) {
1869         int h = 0;
1870         for (int index = 0; index < s.length(); index++) {
1871             char ch = s.charAt(index);
1872             h = 31 * h + ch;
1873             if (ch == '%') {
1874                 /*
1875                  * Process the next two encoded characters
1876                  */
1877                 for (int i = index + 1; i < index + 3; i++)
1878                     h = 31 * h + toUpper(s.charAt(i));
1879                 index += 2;
1880             }
1881         }
1882         return hash * 127 + h;
1883     }
1884 
1885     // US-ASCII only
1886     private static int hashIgnoringCase(int hash, String s) {
1887         if (s == null) return hash;
1888         int h = hash;
1889         int n = s.length();
1890         for (int i = 0; i < n; i++)
1891             h = 31 * h + toLower(s.charAt(i));
1892         return h;
1893     }
1894 
1895     private static int compare(String s, String t) {
1896         if (s == t) return 0;
1897         if (s != null) {
1898             if (t != null)
1899                 return s.compareTo(t);
1900             else
1901                 return +1;
1902         } else {
1903             return -1;
1904         }
1905     }
1906 
1907     // US-ASCII only
1908     private static int compareIgnoringCase(String s, String t) {
1909         if (s == t) return 0;
1910         if (s != null) {
1911             if (t != null) {
1912                 int sn = s.length();
1913                 int tn = t.length();
1914                 int n = sn < tn ? sn : tn;
1915                 for (int i = 0; i < n; i++) {
1916                     int c = toLower(s.charAt(i)) - toLower(t.charAt(i));
1917                     if (c != 0)
1918                         return c;
1919                 }
1920                 return sn - tn;
1921             }
1922             return +1;
1923         } else {
1924             return -1;
1925         }
1926     }
1927 
1928 
1929     // -- String construction --
1930 
1931     // If a scheme is given then the path, if given, must be absolute
1932     //
1933     private static void checkPath(String s, String scheme, String path)
1934         throws URISyntaxException
1935     {
1936         if (scheme != null) {
1937             if ((path != null)
1938                 && ((path.length() > 0) && (path.charAt(0) != '/')))
1939                 throw new URISyntaxException(s,
1940                                              "Relative path in absolute URI");
1941         }
1942     }
1943 
1944     private void appendAuthority(StringBuilder sb,
1945                                  String authority,
1946                                  String userInfo,
1947                                  String host,
1948                                  int port)
1949     {
1950         if (host != null) {
1951             sb.append("//");
1952             if (userInfo != null) {
1953                 sb.append(quote(userInfo, L_USERINFO, H_USERINFO));
1954                 sb.append('@');
1955             }
1956             boolean needBrackets = ((host.indexOf(':') >= 0)
1957                                     && !host.startsWith("[")
1958                                     && !host.endsWith("]"));
1959             if (needBrackets) sb.append('[');
1960             sb.append(host);
1961             if (needBrackets) sb.append(']');
1962             if (port != -1) {
1963                 sb.append(':');
1964                 sb.append(port);
1965             }
1966         } else if (authority != null) {
1967             sb.append("//");
1968             if (authority.startsWith("[")) {
1969                 // authority should (but may not) contain an embedded IPv6 address
1970                 int end = authority.indexOf(']');
1971                 String doquote = authority, dontquote = "";
1972                 if (end != -1 && authority.indexOf(':') != -1) {
1973                     // the authority contains an IPv6 address
1974                     if (end == authority.length()) {
1975                         dontquote = authority;
1976                         doquote = "";
1977                     } else {
1978                         dontquote = authority.substring(0 , end + 1);
1979                         doquote = authority.substring(end + 1);
1980                     }
1981                 }
1982                 sb.append(dontquote);
1983                 sb.append(quote(doquote,
1984                             L_REG_NAME | L_SERVER,
1985                             H_REG_NAME | H_SERVER));
1986             } else {
1987                 sb.append(quote(authority,
1988                             L_REG_NAME | L_SERVER,
1989                             H_REG_NAME | H_SERVER));
1990             }
1991         }
1992     }
1993 
1994     private void appendSchemeSpecificPart(StringBuilder sb,
1995                                           String opaquePart,
1996                                           String authority,
1997                                           String userInfo,
1998                                           String host,
1999                                           int port,
2000                                           String path,
2001                                           String query)
2002     {
2003         if (opaquePart != null) {
2004             /* check if SSP begins with an IPv6 address
2005              * because we must not quote a literal IPv6 address
2006              */
2007             if (opaquePart.startsWith("//[")) {
2008                 int end =  opaquePart.indexOf(']');
2009                 if (end != -1 && opaquePart.indexOf(':')!=-1) {
2010                     String doquote, dontquote;
2011                     if (end == opaquePart.length()) {
2012                         dontquote = opaquePart;
2013                         doquote = "";
2014                     } else {
2015                         dontquote = opaquePart.substring(0,end+1);
2016                         doquote = opaquePart.substring(end+1);
2017                     }
2018                     sb.append (dontquote);
2019                     sb.append(quote(doquote, L_URIC, H_URIC));
2020                 }
2021             } else {
2022                 sb.append(quote(opaquePart, L_URIC, H_URIC));
2023             }
2024         } else {
2025             appendAuthority(sb, authority, userInfo, host, port);
2026             if (path != null)
2027                 sb.append(quote(path, L_PATH, H_PATH));
2028             if (query != null) {
2029                 sb.append('?');
2030                 sb.append(quote(query, L_URIC, H_URIC));
2031             }
2032         }
2033     }
2034 
2035     private void appendFragment(StringBuilder sb, String fragment) {
2036         if (fragment != null) {
2037             sb.append('#');
2038             sb.append(quote(fragment, L_URIC, H_URIC));
2039         }
2040     }
2041 
2042     private String toString(String scheme,
2043                             String opaquePart,
2044                             String authority,
2045                             String userInfo,
2046                             String host,
2047                             int port,
2048                             String path,
2049                             String query,
2050                             String fragment)
2051     {
2052         StringBuilder sb = new StringBuilder();
2053         if (scheme != null) {
2054             sb.append(scheme);
2055             sb.append(':');
2056         }
2057         appendSchemeSpecificPart(sb, opaquePart,
2058                                  authority, userInfo, host, port,
2059                                  path, query);
2060         appendFragment(sb, fragment);
2061         return sb.toString();
2062     }
2063 
2064     // -- Normalization, resolution, and relativization --
2065 
2066     // RFC2396 5.2 (6)
2067     private static String resolvePath(String base, String child,
2068                                       boolean absolute)
2069     {
2070         int i = base.lastIndexOf('/');
2071         int cn = child.length();
2072         String path = "";
2073 
2074         if (cn == 0) {
2075             // 5.2 (6a)
2076             if (i >= 0)
2077                 path = base.substring(0, i + 1);
2078         } else {
2079             StringBuilder sb = new StringBuilder(base.length() + cn);
2080             // 5.2 (6a)
2081             if (i >= 0)
2082                 sb.append(base, 0, i + 1);
2083             // 5.2 (6b)
2084             sb.append(child);
2085             path = sb.toString();
2086         }
2087 
2088         // 5.2 (6c-f)
2089         String np = normalize(path);
2090 
2091         // 5.2 (6g): If the result is absolute but the path begins with "../",
2092         // then we simply leave the path as-is
2093 
2094         return np;
2095     }
2096 
2097     // RFC2396 5.2
2098     private static URI resolve(URI base, URI child) {
2099         // check if child if opaque first so that NPE is thrown
2100         // if child is null.
2101         if (child.isOpaque() || base.isOpaque())
2102             return child;
2103 
2104         // 5.2 (2): Reference to current document (lone fragment)
2105         if ((child.scheme == null) && (child.authority == null)
2106             && child.path.isEmpty() && (child.fragment != null)
2107             && (child.query == null)) {
2108             if ((base.fragment != null)
2109                 && child.fragment.equals(base.fragment)) {
2110                 return base;
2111             }
2112             URI ru = new URI();
2113             ru.scheme = base.scheme;
2114             ru.authority = base.authority;
2115             ru.userInfo = base.userInfo;
2116             ru.host = base.host;
2117             ru.port = base.port;
2118             ru.path = base.path;
2119             ru.fragment = child.fragment;
2120             ru.query = base.query;
2121             return ru;
2122         }
2123 
2124         // 5.2 (3): Child is absolute
2125         if (child.scheme != null)
2126             return child;
2127 
2128         URI ru = new URI();             // Resolved URI
2129         ru.scheme = base.scheme;
2130         ru.query = child.query;
2131         ru.fragment = child.fragment;
2132 
2133         // 5.2 (4): Authority
2134         if (child.authority == null) {
2135             ru.authority = base.authority;
2136             ru.host = base.host;
2137             ru.userInfo = base.userInfo;
2138             ru.port = base.port;
2139 
2140             String cp = (child.path == null) ? "" : child.path;
2141             if ((cp.length() > 0) && (cp.charAt(0) == '/')) {
2142                 // 5.2 (5): Child path is absolute
2143                 ru.path = child.path;
2144             } else {
2145                 // 5.2 (6): Resolve relative path
2146                 ru.path = resolvePath(base.path, cp, base.isAbsolute());
2147             }
2148         } else {
2149             ru.authority = child.authority;
2150             ru.host = child.host;
2151             ru.userInfo = child.userInfo;
2152             ru.host = child.host;
2153             ru.port = child.port;
2154             ru.path = child.path;
2155         }
2156 
2157         // 5.2 (7): Recombine (nothing to do here)
2158         return ru;
2159     }
2160 
2161     // If the given URI's path is normal then return the URI;
2162     // o.w., return a new URI containing the normalized path.
2163     //
2164     private static URI normalize(URI u) {
2165         if (u.isOpaque() || (u.path == null) || (u.path.length() == 0))
2166             return u;
2167 
2168         String np = normalize(u.path);
2169         if (np == u.path)
2170             return u;
2171 
2172         URI v = new URI();
2173         v.scheme = u.scheme;
2174         v.fragment = u.fragment;
2175         v.authority = u.authority;
2176         v.userInfo = u.userInfo;
2177         v.host = u.host;
2178         v.port = u.port;
2179         v.path = np;
2180         v.query = u.query;
2181         return v;
2182     }
2183 
2184     // If both URIs are hierarchical, their scheme and authority components are
2185     // identical, and the base path is a prefix of the child's path, then
2186     // return a relative URI that, when resolved against the base, yields the
2187     // child; otherwise, return the child.
2188     //
2189     private static URI relativize(URI base, URI child) {
2190         // check if child if opaque first so that NPE is thrown
2191         // if child is null.
2192         if (child.isOpaque() || base.isOpaque())
2193             return child;
2194         if (!equalIgnoringCase(base.scheme, child.scheme)
2195             || !equal(base.authority, child.authority))
2196             return child;
2197 
2198         String bp = normalize(base.path);
2199         String cp = normalize(child.path);
2200         if (!bp.equals(cp)) {
2201             if (!bp.endsWith("/"))
2202                 bp = bp + "/";
2203             if (!cp.startsWith(bp))
2204                 return child;
2205         }
2206 
2207         URI v = new URI();
2208         v.path = cp.substring(bp.length());
2209         v.query = child.query;
2210         v.fragment = child.fragment;
2211         return v;
2212     }
2213 
2214 
2215 
2216     // -- Path normalization --
2217 
2218     // The following algorithm for path normalization avoids the creation of a
2219     // string object for each segment, as well as the use of a string buffer to
2220     // compute the final result, by using a single char array and editing it in
2221     // place.  The array is first split into segments, replacing each slash
2222     // with '\0' and creating a segment-index array, each element of which is
2223     // the index of the first char in the corresponding segment.  We then walk
2224     // through both arrays, removing ".", "..", and other segments as necessary
2225     // by setting their entries in the index array to -1.  Finally, the two
2226     // arrays are used to rejoin the segments and compute the final result.
2227     //
2228     // This code is based upon src/solaris/native/java/io/canonicalize_md.c
2229 
2230 
2231     // Check the given path to see if it might need normalization.  A path
2232     // might need normalization if it contains duplicate slashes, a "."
2233     // segment, or a ".." segment.  Return -1 if no further normalization is
2234     // possible, otherwise return the number of segments found.
2235     //
2236     // This method takes a string argument rather than a char array so that
2237     // this test can be performed without invoking path.toCharArray().
2238     //
2239     private static int needsNormalization(String path) {
2240         boolean normal = true;
2241         int ns = 0;                     // Number of segments
2242         int end = path.length() - 1;    // Index of last char in path
2243         int p = 0;                      // Index of next char in path
2244 
2245         // Skip initial slashes
2246         while (p <= end) {
2247             if (path.charAt(p) != '/') break;
2248             p++;
2249         }
2250         if (p > 1) normal = false;
2251 
2252         // Scan segments
2253         while (p <= end) {
2254 
2255             // Looking at "." or ".." ?
2256             if ((path.charAt(p) == '.')
2257                 && ((p == end)
2258                     || ((path.charAt(p + 1) == '/')
2259                         || ((path.charAt(p + 1) == '.')
2260                             && ((p + 1 == end)
2261                                 || (path.charAt(p + 2) == '/')))))) {
2262                 normal = false;
2263             }
2264             ns++;
2265 
2266             // Find beginning of next segment
2267             while (p <= end) {
2268                 if (path.charAt(p++) != '/')
2269                     continue;
2270 
2271                 // Skip redundant slashes
2272                 while (p <= end) {
2273                     if (path.charAt(p) != '/') break;
2274                     normal = false;
2275                     p++;
2276                 }
2277 
2278                 break;
2279             }
2280         }
2281 
2282         return normal ? -1 : ns;
2283     }
2284 
2285 
2286     // Split the given path into segments, replacing slashes with nulls and
2287     // filling in the given segment-index array.
2288     //
2289     // Preconditions:
2290     //   segs.length == Number of segments in path
2291     //
2292     // Postconditions:
2293     //   All slashes in path replaced by '\0'
2294     //   segs[i] == Index of first char in segment i (0 <= i < segs.length)
2295     //
2296     private static void split(char[] path, int[] segs) {
2297         int end = path.length - 1;      // Index of last char in path
2298         int p = 0;                      // Index of next char in path
2299         int i = 0;                      // Index of current segment
2300 
2301         // Skip initial slashes
2302         while (p <= end) {
2303             if (path[p] != '/') break;
2304             path[p] = '\0';
2305             p++;
2306         }
2307 
2308         while (p <= end) {
2309 
2310             // Note start of segment
2311             segs[i++] = p++;
2312 
2313             // Find beginning of next segment
2314             while (p <= end) {
2315                 if (path[p++] != '/')
2316                     continue;
2317                 path[p - 1] = '\0';
2318 
2319                 // Skip redundant slashes
2320                 while (p <= end) {
2321                     if (path[p] != '/') break;
2322                     path[p++] = '\0';
2323                 }
2324                 break;
2325             }
2326         }
2327 
2328         if (i != segs.length)
2329             throw new InternalError();  // ASSERT
2330     }
2331 
2332 
2333     // Join the segments in the given path according to the given segment-index
2334     // array, ignoring those segments whose index entries have been set to -1,
2335     // and inserting slashes as needed.  Return the length of the resulting
2336     // path.
2337     //
2338     // Preconditions:
2339     //   segs[i] == -1 implies segment i is to be ignored
2340     //   path computed by split, as above, with '\0' having replaced '/'
2341     //
2342     // Postconditions:
2343     //   path[0] .. path[return value] == Resulting path
2344     //
2345     private static int join(char[] path, int[] segs) {
2346         int ns = segs.length;           // Number of segments
2347         int end = path.length - 1;      // Index of last char in path
2348         int p = 0;                      // Index of next path char to write
2349 
2350         if (path[p] == '\0') {
2351             // Restore initial slash for absolute paths
2352             path[p++] = '/';
2353         }
2354 
2355         for (int i = 0; i < ns; i++) {
2356             int q = segs[i];            // Current segment
2357             if (q == -1)
2358                 // Ignore this segment
2359                 continue;
2360 
2361             if (p == q) {
2362                 // We're already at this segment, so just skip to its end
2363                 while ((p <= end) && (path[p] != '\0'))
2364                     p++;
2365                 if (p <= end) {
2366                     // Preserve trailing slash
2367                     path[p++] = '/';
2368                 }
2369             } else if (p < q) {
2370                 // Copy q down to p
2371                 while ((q <= end) && (path[q] != '\0'))
2372                     path[p++] = path[q++];
2373                 if (q <= end) {
2374                     // Preserve trailing slash
2375                     path[p++] = '/';
2376                 }
2377             } else
2378                 throw new InternalError(); // ASSERT false
2379         }
2380 
2381         return p;
2382     }
2383 
2384 
2385     // Remove "." segments from the given path, and remove segment pairs
2386     // consisting of a non-".." segment followed by a ".." segment.
2387     //
2388     private static void removeDots(char[] path, int[] segs) {
2389         int ns = segs.length;
2390         int end = path.length - 1;
2391 
2392         for (int i = 0; i < ns; i++) {
2393             int dots = 0;               // Number of dots found (0, 1, or 2)
2394 
2395             // Find next occurrence of "." or ".."
2396             do {
2397                 int p = segs[i];
2398                 if (path[p] == '.') {
2399                     if (p == end) {
2400                         dots = 1;
2401                         break;
2402                     } else if (path[p + 1] == '\0') {
2403                         dots = 1;
2404                         break;
2405                     } else if ((path[p + 1] == '.')
2406                                && ((p + 1 == end)
2407                                    || (path[p + 2] == '\0'))) {
2408                         dots = 2;
2409                         break;
2410                     }
2411                 }
2412                 i++;
2413             } while (i < ns);
2414             if ((i > ns) || (dots == 0))
2415                 break;
2416 
2417             if (dots == 1) {
2418                 // Remove this occurrence of "."
2419                 segs[i] = -1;
2420             } else {
2421                 // If there is a preceding non-".." segment, remove both that
2422                 // segment and this occurrence of ".."; otherwise, leave this
2423                 // ".." segment as-is.
2424                 int j;
2425                 for (j = i - 1; j >= 0; j--) {
2426                     if (segs[j] != -1) break;
2427                 }
2428                 if (j >= 0) {
2429                     int q = segs[j];
2430                     if (!((path[q] == '.')
2431                           && (path[q + 1] == '.')
2432                           && (path[q + 2] == '\0'))) {
2433                         segs[i] = -1;
2434                         segs[j] = -1;
2435                     }
2436                 }
2437             }
2438         }
2439     }
2440 
2441 
2442     // DEVIATION: If the normalized path is relative, and if the first
2443     // segment could be parsed as a scheme name, then prepend a "." segment
2444     //
2445     private static void maybeAddLeadingDot(char[] path, int[] segs) {
2446 
2447         if (path[0] == '\0')
2448             // The path is absolute
2449             return;
2450 
2451         int ns = segs.length;
2452         int f = 0;                      // Index of first segment
2453         while (f < ns) {
2454             if (segs[f] >= 0)
2455                 break;
2456             f++;
2457         }
2458         if ((f >= ns) || (f == 0))
2459             // The path is empty, or else the original first segment survived,
2460             // in which case we already know that no leading "." is needed
2461             return;
2462 
2463         int p = segs[f];
2464         while ((p < path.length) && (path[p] != ':') && (path[p] != '\0')) p++;
2465         if (p >= path.length || path[p] == '\0')
2466             // No colon in first segment, so no "." needed
2467             return;
2468 
2469         // At this point we know that the first segment is unused,
2470         // hence we can insert a "." segment at that position
2471         path[0] = '.';
2472         path[1] = '\0';
2473         segs[0] = 0;
2474     }
2475 
2476 
2477     // Normalize the given path string.  A normal path string has no empty
2478     // segments (i.e., occurrences of "//"), no segments equal to ".", and no
2479     // segments equal to ".." that are preceded by a segment not equal to "..".
2480     // In contrast to Unix-style pathname normalization, for URI paths we
2481     // always retain trailing slashes.
2482     //
2483     private static String normalize(String ps) {
2484 
2485         // Does this path need normalization?
2486         int ns = needsNormalization(ps);        // Number of segments
2487         if (ns < 0)
2488             // Nope -- just return it
2489             return ps;
2490 
2491         char[] path = ps.toCharArray();         // Path in char-array form
2492 
2493         // Split path into segments
2494         int[] segs = new int[ns];               // Segment-index array
2495         split(path, segs);
2496 
2497         // Remove dots
2498         removeDots(path, segs);
2499 
2500         // Prevent scheme-name confusion
2501         maybeAddLeadingDot(path, segs);
2502 
2503         // Join the remaining segments and return the result
2504         String s = new String(path, 0, join(path, segs));
2505         if (s.equals(ps)) {
2506             // string was already normalized
2507             return ps;
2508         }
2509         return s;
2510     }
2511 
2512 
2513 
2514     // -- Character classes for parsing --
2515 
2516     // RFC2396 precisely specifies which characters in the US-ASCII charset are
2517     // permissible in the various components of a URI reference.  We here
2518     // define a set of mask pairs to aid in enforcing these restrictions.  Each
2519     // mask pair consists of two longs, a low mask and a high mask.  Taken
2520     // together they represent a 128-bit mask, where bit i is set iff the
2521     // character with value i is permitted.
2522     //
2523     // This approach is more efficient than sequentially searching arrays of
2524     // permitted characters.  It could be made still more efficient by
2525     // precompiling the mask information so that a character's presence in a
2526     // given mask could be determined by a single table lookup.
2527 
2528     // Compute the low-order mask for the characters in the given string
2529     private static long lowMask(String chars) {
2530         int n = chars.length();
2531         long m = 0;
2532         for (int i = 0; i < n; i++) {
2533             char c = chars.charAt(i);
2534             if (c < 64)
2535                 m |= (1L << c);
2536         }
2537         return m;
2538     }
2539 
2540     // Compute the high-order mask for the characters in the given string
2541     private static long highMask(String chars) {
2542         int n = chars.length();
2543         long m = 0;
2544         for (int i = 0; i < n; i++) {
2545             char c = chars.charAt(i);
2546             if ((c >= 64) && (c < 128))
2547                 m |= (1L << (c - 64));
2548         }
2549         return m;
2550     }
2551 
2552     // Compute a low-order mask for the characters
2553     // between first and last, inclusive
2554     private static long lowMask(char first, char last) {
2555         long m = 0;
2556         int f = Math.max(Math.min(first, 63), 0);
2557         int l = Math.max(Math.min(last, 63), 0);
2558         for (int i = f; i <= l; i++)
2559             m |= 1L << i;
2560         return m;
2561     }
2562 
2563     // Compute a high-order mask for the characters
2564     // between first and last, inclusive
2565     private static long highMask(char first, char last) {
2566         long m = 0;
2567         int f = Math.max(Math.min(first, 127), 64) - 64;
2568         int l = Math.max(Math.min(last, 127), 64) - 64;
2569         for (int i = f; i <= l; i++)
2570             m |= 1L << i;
2571         return m;
2572     }
2573 
2574     // Tell whether the given character is permitted by the given mask pair
2575     private static boolean match(char c, long lowMask, long highMask) {
2576         if (c == 0) // 0 doesn't have a slot in the mask. So, it never matches.
2577             return false;
2578         if (c < 64)
2579             return ((1L << c) & lowMask) != 0;
2580         if (c < 128)
2581             return ((1L << (c - 64)) & highMask) != 0;
2582         return false;
2583     }
2584 
2585     // Character-class masks, in reverse order from RFC2396 because
2586     // initializers for static fields cannot make forward references.
2587 
2588     // digit    = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
2589     //            "8" | "9"
2590     private static final long L_DIGIT = lowMask('0', '9');
2591     private static final long H_DIGIT = 0L;
2592 
2593     // upalpha  = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" |
2594     //            "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" |
2595     //            "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z"
2596     private static final long L_UPALPHA = 0L;
2597     private static final long H_UPALPHA = highMask('A', 'Z');
2598 
2599     // lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" |
2600     //            "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" |
2601     //            "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z"
2602     private static final long L_LOWALPHA = 0L;
2603     private static final long H_LOWALPHA = highMask('a', 'z');
2604 
2605     // alpha         = lowalpha | upalpha
2606     private static final long L_ALPHA = L_LOWALPHA | L_UPALPHA;
2607     private static final long H_ALPHA = H_LOWALPHA | H_UPALPHA;
2608 
2609     // alphanum      = alpha | digit
2610     private static final long L_ALPHANUM = L_DIGIT | L_ALPHA;
2611     private static final long H_ALPHANUM = H_DIGIT | H_ALPHA;
2612 
2613     // hex           = digit | "A" | "B" | "C" | "D" | "E" | "F" |
2614     //                         "a" | "b" | "c" | "d" | "e" | "f"
2615     private static final long L_HEX = L_DIGIT;
2616     private static final long H_HEX = highMask('A', 'F') | highMask('a', 'f');
2617 
2618     // mark          = "-" | "_" | "." | "!" | "~" | "*" | "'" |
2619     //                 "(" | ")"
2620     private static final long L_MARK = lowMask("-_.!~*'()");
2621     private static final long H_MARK = highMask("-_.!~*'()");
2622 
2623     // unreserved    = alphanum | mark
2624     private static final long L_UNRESERVED = L_ALPHANUM | L_MARK;
2625     private static final long H_UNRESERVED = H_ALPHANUM | H_MARK;
2626 
2627     // reserved      = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
2628     //                 "$" | "," | "[" | "]"
2629     // Added per RFC2732: "[", "]"
2630     private static final long L_RESERVED = lowMask(";/?:@&=+$,[]");
2631     private static final long H_RESERVED = highMask(";/?:@&=+$,[]");
2632 
2633     // The zero'th bit is used to indicate that escape pairs and non-US-ASCII
2634     // characters are allowed; this is handled by the scanEscape method below.
2635     private static final long L_ESCAPED = 1L;
2636     private static final long H_ESCAPED = 0L;
2637 
2638     // uric          = reserved | unreserved | escaped
2639     private static final long L_URIC = L_RESERVED | L_UNRESERVED | L_ESCAPED;
2640     private static final long H_URIC = H_RESERVED | H_UNRESERVED | H_ESCAPED;
2641 
2642     // pchar         = unreserved | escaped |
2643     //                 ":" | "@" | "&" | "=" | "+" | "$" | ","
2644     private static final long L_PCHAR
2645         = L_UNRESERVED | L_ESCAPED | lowMask(":@&=+$,");
2646     private static final long H_PCHAR
2647         = H_UNRESERVED | H_ESCAPED | highMask(":@&=+$,");
2648 
2649     // All valid path characters
2650     private static final long L_PATH = L_PCHAR | lowMask(";/");
2651     private static final long H_PATH = H_PCHAR | highMask(";/");
2652 
2653     // Dash, for use in domainlabel and toplabel
2654     private static final long L_DASH = lowMask("-");
2655     private static final long H_DASH = highMask("-");
2656 
2657     // Dot, for use in hostnames
2658     private static final long L_DOT = lowMask(".");
2659     private static final long H_DOT = highMask(".");
2660 
2661     // userinfo      = *( unreserved | escaped |
2662     //                    ";" | ":" | "&" | "=" | "+" | "$" | "," )
2663     private static final long L_USERINFO
2664         = L_UNRESERVED | L_ESCAPED | lowMask(";:&=+$,");
2665     private static final long H_USERINFO
2666         = H_UNRESERVED | H_ESCAPED | highMask(";:&=+$,");
2667 
2668     // reg_name      = 1*( unreserved | escaped | "$" | "," |
2669     //                     ";" | ":" | "@" | "&" | "=" | "+" )
2670     private static final long L_REG_NAME
2671         = L_UNRESERVED | L_ESCAPED | lowMask("$,;:@&=+");
2672     private static final long H_REG_NAME
2673         = H_UNRESERVED | H_ESCAPED | highMask("$,;:@&=+");
2674 
2675     // All valid characters for server-based authorities
2676     private static final long L_SERVER
2677         = L_USERINFO | L_ALPHANUM | L_DASH | lowMask(".:@[]");
2678     private static final long H_SERVER
2679         = H_USERINFO | H_ALPHANUM | H_DASH | highMask(".:@[]");
2680 
2681     // Special case of server authority that represents an IPv6 address
2682     // In this case, a % does not signify an escape sequence
2683     private static final long L_SERVER_PERCENT
2684         = L_SERVER | lowMask("%");
2685     private static final long H_SERVER_PERCENT
2686         = H_SERVER | highMask("%");
2687     private static final long L_LEFT_BRACKET = lowMask("[");
2688     private static final long H_LEFT_BRACKET = highMask("[");
2689 
2690     // scheme        = alpha *( alpha | digit | "+" | "-" | "." )
2691     private static final long L_SCHEME = L_ALPHA | L_DIGIT | lowMask("+-.");
2692     private static final long H_SCHEME = H_ALPHA | H_DIGIT | highMask("+-.");
2693 
2694     // scope_id = alpha | digit | "_" | "."
2695     private static final long L_SCOPE_ID
2696         = L_ALPHANUM | lowMask("_.");
2697     private static final long H_SCOPE_ID
2698         = H_ALPHANUM | highMask("_.");
2699 
2700     // -- Escaping and encoding --
2701 
2702     private static final char[] hexDigits = {
2703         '0', '1', '2', '3', '4', '5', '6', '7',
2704         '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
2705     };
2706 
2707     private static void appendEscape(StringBuilder sb, byte b) {
2708         sb.append('%');
2709         sb.append(hexDigits[(b >> 4) & 0x0f]);
2710         sb.append(hexDigits[(b >> 0) & 0x0f]);
2711     }
2712 
2713     private static void appendEncoded(StringBuilder sb, char c) {
2714         ByteBuffer bb = null;
2715         try {
2716             bb = ThreadLocalCoders.encoderFor("UTF-8")
2717                 .encode(CharBuffer.wrap("" + c));
2718         } catch (CharacterCodingException x) {
2719             assert false;
2720         }
2721         while (bb.hasRemaining()) {
2722             int b = bb.get() & 0xff;
2723             if (b >= 0x80)
2724                 appendEscape(sb, (byte)b);
2725             else
2726                 sb.append((char)b);
2727         }
2728     }
2729 
2730     // Quote any characters in s that are not permitted
2731     // by the given mask pair
2732     //
2733     private static String quote(String s, long lowMask, long highMask) {
2734         StringBuilder sb = null;
2735         boolean allowNonASCII = ((lowMask & L_ESCAPED) != 0);
2736         for (int i = 0; i < s.length(); i++) {
2737             char c = s.charAt(i);
2738             if (c < '\u0080') {
2739                 if (!match(c, lowMask, highMask)) {
2740                     if (sb == null) {
2741                         sb = new StringBuilder();
2742                         sb.append(s, 0, i);
2743                     }
2744                     appendEscape(sb, (byte)c);
2745                 } else {
2746                     if (sb != null)
2747                         sb.append(c);
2748                 }
2749             } else if (allowNonASCII
2750                        && (Character.isSpaceChar(c)
2751                            || Character.isISOControl(c))) {
2752                 if (sb == null) {
2753                     sb = new StringBuilder();
2754                     sb.append(s, 0, i);
2755                 }
2756                 appendEncoded(sb, c);
2757             } else {
2758                 if (sb != null)
2759                     sb.append(c);
2760             }
2761         }
2762         return (sb == null) ? s : sb.toString();
2763     }
2764 
2765     // Encodes all characters >= \u0080 into escaped, normalized UTF-8 octets,
2766     // assuming that s is otherwise legal
2767     //
2768     private static String encode(String s) {
2769         int n = s.length();
2770         if (n == 0)
2771             return s;
2772 
2773         // First check whether we actually need to encode
2774         for (int i = 0;;) {
2775             if (s.charAt(i) >= '\u0080')
2776                 break;
2777             if (++i >= n)
2778                 return s;
2779         }
2780 
2781         String ns = Normalizer.normalize(s, Normalizer.Form.NFC);
2782         ByteBuffer bb = null;
2783         try {
2784             bb = ThreadLocalCoders.encoderFor("UTF-8")
2785                 .encode(CharBuffer.wrap(ns));
2786         } catch (CharacterCodingException x) {
2787             assert false;
2788         }
2789 
2790         StringBuilder sb = new StringBuilder();
2791         while (bb.hasRemaining()) {
2792             int b = bb.get() & 0xff;
2793             if (b >= 0x80)
2794                 appendEscape(sb, (byte)b);
2795             else
2796                 sb.append((char)b);
2797         }
2798         return sb.toString();
2799     }
2800 
2801     private static int decode(char c) {
2802         if ((c >= '0') && (c <= '9'))
2803             return c - '0';
2804         if ((c >= 'a') && (c <= 'f'))
2805             return c - 'a' + 10;
2806         if ((c >= 'A') && (c <= 'F'))
2807             return c - 'A' + 10;
2808         assert false;
2809         return -1;
2810     }
2811 
2812     private static byte decode(char c1, char c2) {
2813         return (byte)(  ((decode(c1) & 0xf) << 4)
2814                       | ((decode(c2) & 0xf) << 0));
2815     }
2816 
2817     // Evaluates all escapes in s, applying UTF-8 decoding if needed.  Assumes
2818     // that escapes are well-formed syntactically, i.e., of the form %XX.  If a
2819     // sequence of escaped octets is not valid UTF-8 then the erroneous octets
2820     // are replaced with '\uFFFD'.
2821     // Exception: any "%" found between "[]" is left alone. It is an IPv6 literal
2822     //            with a scope_id
2823     //
2824     private static String decode(String s) {
2825         return decode(s, true);
2826     }
2827 
2828     // This method was introduced as a generalization of URI.decode method
2829     // to provide a fix for JDK-8037396
2830     private static String decode(String s, boolean ignorePercentInBrackets) {
2831         if (s == null)
2832             return s;
2833         int n = s.length();
2834         if (n == 0)
2835             return s;
2836         if (s.indexOf('%') < 0)
2837             return s;
2838 
2839         StringBuilder sb = new StringBuilder(n);
2840         ByteBuffer bb = ByteBuffer.allocate(n);
2841         CharBuffer cb = CharBuffer.allocate(n);
2842         CharsetDecoder dec = ThreadLocalCoders.decoderFor("UTF-8")
2843                 .onMalformedInput(CodingErrorAction.REPLACE)
2844                 .onUnmappableCharacter(CodingErrorAction.REPLACE);
2845 
2846         // This is not horribly efficient, but it will do for now
2847         char c = s.charAt(0);
2848         boolean betweenBrackets = false;
2849 
2850         for (int i = 0; i < n;) {
2851             assert c == s.charAt(i);    // Loop invariant
2852             if (c == '[') {
2853                 betweenBrackets = true;
2854             } else if (betweenBrackets && c == ']') {
2855                 betweenBrackets = false;
2856             }
2857             if (c != '%' || (betweenBrackets && ignorePercentInBrackets)) {
2858                 sb.append(c);
2859                 if (++i >= n)
2860                     break;
2861                 c = s.charAt(i);
2862                 continue;
2863             }
2864             bb.clear();
2865             int ui = i;
2866             for (;;) {
2867                 assert (n - i >= 2);
2868                 bb.put(decode(s.charAt(++i), s.charAt(++i)));
2869                 if (++i >= n)
2870                     break;
2871                 c = s.charAt(i);
2872                 if (c != '%')
2873                     break;
2874             }
2875             bb.flip();
2876             cb.clear();
2877             dec.reset();
2878             CoderResult cr = dec.decode(bb, cb, true);
2879             assert cr.isUnderflow();
2880             cr = dec.flush(cb);
2881             assert cr.isUnderflow();
2882             sb.append(cb.flip().toString());
2883         }
2884 
2885         return sb.toString();
2886     }
2887 
2888 
2889     // -- Parsing --
2890 
2891     // For convenience we wrap the input URI string in a new instance of the
2892     // following internal class.  This saves always having to pass the input
2893     // string as an argument to each internal scan/parse method.
2894 
2895     private class Parser {
2896 
2897         private String input;           // URI input string
2898         private boolean requireServerAuthority = false;
2899 
2900         Parser(String s) {
2901             input = s;
2902             string = s;
2903         }
2904 
2905         // -- Methods for throwing URISyntaxException in various ways --
2906 
2907         private void fail(String reason) throws URISyntaxException {
2908             throw new URISyntaxException(input, reason);
2909         }
2910 
2911         private void fail(String reason, int p) throws URISyntaxException {
2912             throw new URISyntaxException(input, reason, p);
2913         }
2914 
2915         private void failExpecting(String expected, int p)
2916             throws URISyntaxException
2917         {
2918             fail("Expected " + expected, p);
2919         }
2920 
2921 
2922         // -- Simple access to the input string --
2923 
2924         // Tells whether start < end and, if so, whether charAt(start) == c
2925         //
2926         private boolean at(int start, int end, char c) {
2927             return (start < end) && (input.charAt(start) == c);
2928         }
2929 
2930         // Tells whether start + s.length() < end and, if so,
2931         // whether the chars at the start position match s exactly
2932         //
2933         private boolean at(int start, int end, String s) {
2934             int p = start;
2935             int sn = s.length();
2936             if (sn > end - p)
2937                 return false;
2938             int i = 0;
2939             while (i < sn) {
2940                 if (input.charAt(p++) != s.charAt(i)) {
2941                     break;
2942                 }
2943                 i++;
2944             }
2945             return (i == sn);
2946         }
2947 
2948 
2949         // -- Scanning --
2950 
2951         // The various scan and parse methods that follow use a uniform
2952         // convention of taking the current start position and end index as
2953         // their first two arguments.  The start is inclusive while the end is
2954         // exclusive, just as in the String class, i.e., a start/end pair
2955         // denotes the left-open interval [start, end) of the input string.
2956         //
2957         // These methods never proceed past the end position.  They may return
2958         // -1 to indicate outright failure, but more often they simply return
2959         // the position of the first char after the last char scanned.  Thus
2960         // a typical idiom is
2961         //
2962         //     int p = start;
2963         //     int q = scan(p, end, ...);
2964         //     if (q > p)
2965         //         // We scanned something
2966         //         ...;
2967         //     else if (q == p)
2968         //         // We scanned nothing
2969         //         ...;
2970         //     else if (q == -1)
2971         //         // Something went wrong
2972         //         ...;
2973 
2974 
2975         // Scan a specific char: If the char at the given start position is
2976         // equal to c, return the index of the next char; otherwise, return the
2977         // start position.
2978         //
2979         private int scan(int start, int end, char c) {
2980             if ((start < end) && (input.charAt(start) == c))
2981                 return start + 1;
2982             return start;
2983         }
2984 
2985         // Scan forward from the given start position.  Stop at the first char
2986         // in the err string (in which case -1 is returned), or the first char
2987         // in the stop string (in which case the index of the preceding char is
2988         // returned), or the end of the input string (in which case the length
2989         // of the input string is returned).  May return the start position if
2990         // nothing matches.
2991         //
2992         private int scan(int start, int end, String err, String stop) {
2993             int p = start;
2994             while (p < end) {
2995                 char c = input.charAt(p);
2996                 if (err.indexOf(c) >= 0)
2997                     return -1;
2998                 if (stop.indexOf(c) >= 0)
2999                     break;
3000                 p++;
3001             }
3002             return p;
3003         }
3004 
3005         // Scan forward from the given start position.  Stop at the first char
3006         // in the stop string (in which case the index of the preceding char is
3007         // returned), or the end of the input string (in which case the length
3008         // of the input string is returned).  May return the start position if
3009         // nothing matches.
3010         //
3011         private int scan(int start, int end, String stop) {
3012             int p = start;
3013             while (p < end) {
3014                 char c = input.charAt(p);
3015                 if (stop.indexOf(c) >= 0)
3016                     break;
3017                 p++;
3018             }
3019             return p;
3020         }
3021 
3022         // Scan a potential escape sequence, starting at the given position,
3023         // with the given first char (i.e., charAt(start) == c).
3024         //
3025         // This method assumes that if escapes are allowed then visible
3026         // non-US-ASCII chars are also allowed.
3027         //
3028         private int scanEscape(int start, int n, char first)
3029             throws URISyntaxException
3030         {
3031             int p = start;
3032             char c = first;
3033             if (c == '%') {
3034                 // Process escape pair
3035                 if ((p + 3 <= n)
3036                     && match(input.charAt(p + 1), L_HEX, H_HEX)
3037                     && match(input.charAt(p + 2), L_HEX, H_HEX)) {
3038                     return p + 3;
3039                 }
3040                 fail("Malformed escape pair", p);
3041             } else if ((c > 128)
3042                        && !Character.isSpaceChar(c)
3043                        && !Character.isISOControl(c)) {
3044                 // Allow unescaped but visible non-US-ASCII chars
3045                 return p + 1;
3046             }
3047             return p;
3048         }
3049 
3050         // Scan chars that match the given mask pair
3051         //
3052         private int scan(int start, int n, long lowMask, long highMask)
3053             throws URISyntaxException
3054         {
3055             int p = start;
3056             while (p < n) {
3057                 char c = input.charAt(p);
3058                 if (match(c, lowMask, highMask)) {
3059                     p++;
3060                     continue;
3061                 }
3062                 if ((lowMask & L_ESCAPED) != 0) {
3063                     int q = scanEscape(p, n, c);
3064                     if (q > p) {
3065                         p = q;
3066                         continue;
3067                     }
3068                 }
3069                 break;
3070             }
3071             return p;
3072         }
3073 
3074         // Check that each of the chars in [start, end) matches the given mask
3075         //
3076         private void checkChars(int start, int end,
3077                                 long lowMask, long highMask,
3078                                 String what)
3079             throws URISyntaxException
3080         {
3081             int p = scan(start, end, lowMask, highMask);
3082             if (p < end)
3083                 fail("Illegal character in " + what, p);
3084         }
3085 
3086         // Check that the char at position p matches the given mask
3087         //
3088         private void checkChar(int p,
3089                                long lowMask, long highMask,
3090                                String what)
3091             throws URISyntaxException
3092         {
3093             checkChars(p, p + 1, lowMask, highMask, what);
3094         }
3095 
3096 
3097         // -- Parsing --
3098 
3099         // [<scheme>:]<scheme-specific-part>[#<fragment>]
3100         //
3101         void parse(boolean rsa) throws URISyntaxException {
3102             requireServerAuthority = rsa;
3103             int n = input.length();
3104             int p = scan(0, n, "/?#", ":");
3105             if ((p >= 0) && at(p, n, ':')) {
3106                 if (p == 0)
3107                     failExpecting("scheme name", 0);
3108                 checkChar(0, L_ALPHA, H_ALPHA, "scheme name");
3109                 checkChars(1, p, L_SCHEME, H_SCHEME, "scheme name");
3110                 scheme = input.substring(0, p);
3111                 p++;                    // Skip ':'
3112                 if (at(p, n, '/')) {
3113                     p = parseHierarchical(p, n);
3114                 } else {
3115                     // opaque; need to create the schemeSpecificPart
3116                     int q = scan(p, n, "#");
3117                     if (q <= p)
3118                         failExpecting("scheme-specific part", p);
3119                     checkChars(p, q, L_URIC, H_URIC, "opaque part");
3120                     schemeSpecificPart = input.substring(p, q);
3121                     p = q;
3122                 }
3123             } else {
3124                 p = parseHierarchical(0, n);
3125             }
3126             if (at(p, n, '#')) {
3127                 checkChars(p + 1, n, L_URIC, H_URIC, "fragment");
3128                 fragment = input.substring(p + 1, n);
3129                 p = n;
3130             }
3131             if (p < n)
3132                 fail("end of URI", p);
3133         }
3134 
3135         // [//authority]<path>[?<query>]
3136         //
3137         // DEVIATION from RFC2396: We allow an empty authority component as
3138         // long as it's followed by a non-empty path, query component, or
3139         // fragment component.  This is so that URIs such as "file:///foo/bar"
3140         // will parse.  This seems to be the intent of RFC2396, though the
3141         // grammar does not permit it.  If the authority is empty then the
3142         // userInfo, host, and port components are undefined.
3143         //
3144         // DEVIATION from RFC2396: We allow empty relative paths.  This seems
3145         // to be the intent of RFC2396, but the grammar does not permit it.
3146         // The primary consequence of this deviation is that "#f" parses as a
3147         // relative URI with an empty path.
3148         //
3149         private int parseHierarchical(int start, int n)
3150             throws URISyntaxException
3151         {
3152             int p = start;
3153             if (at(p, n, '/') && at(p + 1, n, '/')) {
3154                 p += 2;
3155                 int q = scan(p, n, "/?#");
3156                 if (q > p) {
3157                     p = parseAuthority(p, q);
3158                 } else if (q < n) {
3159                     // DEVIATION: Allow empty authority prior to non-empty
3160                     // path, query component or fragment identifier
3161                 } else
3162                     failExpecting("authority", p);
3163             }
3164             int q = scan(p, n, "?#"); // DEVIATION: May be empty
3165             checkChars(p, q, L_PATH, H_PATH, "path");
3166             path = input.substring(p, q);
3167             p = q;
3168             if (at(p, n, '?')) {
3169                 p++;
3170                 q = scan(p, n, "#");
3171                 checkChars(p, q, L_URIC, H_URIC, "query");
3172                 query = input.substring(p, q);
3173                 p = q;
3174             }
3175             return p;
3176         }
3177 
3178         // authority     = server | reg_name
3179         //
3180         // Ambiguity: An authority that is a registry name rather than a server
3181         // might have a prefix that parses as a server.  We use the fact that
3182         // the authority component is always followed by '/' or the end of the
3183         // input string to resolve this: If the complete authority did not
3184         // parse as a server then we try to parse it as a registry name.
3185         //
3186         private int parseAuthority(int start, int n)
3187             throws URISyntaxException
3188         {
3189             int p = start;
3190             int q = p;
3191             URISyntaxException ex = null;
3192 
3193             boolean serverChars;
3194             boolean regChars;
3195 
3196             if (scan(p, n, "]") > p) {
3197                 // contains a literal IPv6 address, therefore % is allowed
3198                 serverChars = (scan(p, n, L_SERVER_PERCENT, H_SERVER_PERCENT) == n);
3199             } else {
3200                 serverChars = (scan(p, n, L_SERVER, H_SERVER) == n);
3201             }
3202             regChars = (scan(p, n, L_REG_NAME, H_REG_NAME) == n);
3203 
3204             if (regChars && !serverChars) {
3205                 // Must be a registry-based authority
3206                 authority = input.substring(p, n);
3207                 return n;
3208             }
3209 
3210             if (serverChars) {
3211                 // Might be (probably is) a server-based authority, so attempt
3212                 // to parse it as such.  If the attempt fails, try to treat it
3213                 // as a registry-based authority.
3214                 try {
3215                     q = parseServer(p, n);
3216                     if (q < n)
3217                         failExpecting("end of authority", q);
3218                     authority = input.substring(p, n);
3219                 } catch (URISyntaxException x) {
3220                     // Undo results of failed parse
3221                     userInfo = null;
3222                     host = null;
3223                     port = -1;
3224                     if (requireServerAuthority) {
3225                         // If we're insisting upon a server-based authority,
3226                         // then just re-throw the exception
3227                         throw x;
3228                     } else {
3229                         // Save the exception in case it doesn't parse as a
3230                         // registry either
3231                         ex = x;
3232                         q = p;
3233                     }
3234                 }
3235             }
3236 
3237             if (q < n) {
3238                 if (regChars) {
3239                     // Registry-based authority
3240                     authority = input.substring(p, n);
3241                 } else if (ex != null) {
3242                     // Re-throw exception; it was probably due to
3243                     // a malformed IPv6 address
3244                     throw ex;
3245                 } else {
3246                     fail("Illegal character in authority", q);
3247                 }
3248             }
3249 
3250             return n;
3251         }
3252 
3253 
3254         // [<userinfo>@]<host>[:<port>]
3255         //
3256         private int parseServer(int start, int n)
3257             throws URISyntaxException
3258         {
3259             int p = start;
3260             int q;
3261 
3262             // userinfo
3263             q = scan(p, n, "/?#", "@");
3264             if ((q >= p) && at(q, n, '@')) {
3265                 checkChars(p, q, L_USERINFO, H_USERINFO, "user info");
3266                 userInfo = input.substring(p, q);
3267                 p = q + 1;              // Skip '@'
3268             }
3269 
3270             // hostname, IPv4 address, or IPv6 address
3271             if (at(p, n, '[')) {
3272                 // DEVIATION from RFC2396: Support IPv6 addresses, per RFC2732
3273                 p++;
3274                 q = scan(p, n, "/?#", "]");
3275                 if ((q > p) && at(q, n, ']')) {
3276                     // look for a "%" scope id
3277                     int r = scan (p, q, "%");
3278                     if (r > p) {
3279                         parseIPv6Reference(p, r);
3280                         if (r+1 == q) {
3281                             fail ("scope id expected");
3282                         }
3283                         checkChars (r+1, q, L_SCOPE_ID, H_SCOPE_ID,
3284                                                 "scope id");
3285                     } else {
3286                         parseIPv6Reference(p, q);
3287                     }
3288                     host = input.substring(p-1, q+1);
3289                     p = q + 1;
3290                 } else {
3291                     failExpecting("closing bracket for IPv6 address", q);
3292                 }
3293             } else {
3294                 q = parseIPv4Address(p, n);
3295                 if (q <= p)
3296                     q = parseHostname(p, n);
3297                 p = q;
3298             }
3299 
3300             // port
3301             if (at(p, n, ':')) {
3302                 p++;
3303                 q = scan(p, n, "/");
3304                 if (q > p) {
3305                     checkChars(p, q, L_DIGIT, H_DIGIT, "port number");
3306                     try {
3307                         port = Integer.parseInt(input, p, q, 10);
3308                     } catch (NumberFormatException x) {
3309                         fail("Malformed port number", p);
3310                     }
3311                     p = q;
3312                 }
3313             }
3314             if (p < n)
3315                 failExpecting("port number", p);
3316 
3317             return p;
3318         }
3319 
3320         // Scan a string of decimal digits whose value fits in a byte
3321         //
3322         private int scanByte(int start, int n)
3323             throws URISyntaxException
3324         {
3325             int p = start;
3326             int q = scan(p, n, L_DIGIT, H_DIGIT);
3327             if (q <= p) return q;
3328             if (Integer.parseInt(input, p, q, 10) > 255) return p;
3329             return q;
3330         }
3331 
3332         // Scan an IPv4 address.
3333         //
3334         // If the strict argument is true then we require that the given
3335         // interval contain nothing besides an IPv4 address; if it is false
3336         // then we only require that it start with an IPv4 address.
3337         //
3338         // If the interval does not contain or start with (depending upon the
3339         // strict argument) a legal IPv4 address characters then we return -1
3340         // immediately; otherwise we insist that these characters parse as a
3341         // legal IPv4 address and throw an exception on failure.
3342         //
3343         // We assume that any string of decimal digits and dots must be an IPv4
3344         // address.  It won't parse as a hostname anyway, so making that
3345         // assumption here allows more meaningful exceptions to be thrown.
3346         //
3347         private int scanIPv4Address(int start, int n, boolean strict)
3348             throws URISyntaxException
3349         {
3350             int p = start;
3351             int q;
3352             int m = scan(p, n, L_DIGIT | L_DOT, H_DIGIT | H_DOT);
3353             if ((m <= p) || (strict && (m != n)))
3354                 return -1;
3355             for (;;) {
3356                 // Per RFC2732: At most three digits per byte
3357                 // Further constraint: Each element fits in a byte
3358                 if ((q = scanByte(p, m)) <= p) break;   p = q;
3359                 if ((q = scan(p, m, '.')) <= p) break;  p = q;
3360                 if ((q = scanByte(p, m)) <= p) break;   p = q;
3361                 if ((q = scan(p, m, '.')) <= p) break;  p = q;
3362                 if ((q = scanByte(p, m)) <= p) break;   p = q;
3363                 if ((q = scan(p, m, '.')) <= p) break;  p = q;
3364                 if ((q = scanByte(p, m)) <= p) break;   p = q;
3365                 if (q < m) break;
3366                 return q;
3367             }
3368             fail("Malformed IPv4 address", q);
3369             return -1;
3370         }
3371 
3372         // Take an IPv4 address: Throw an exception if the given interval
3373         // contains anything except an IPv4 address
3374         //
3375         private int takeIPv4Address(int start, int n, String expected)
3376             throws URISyntaxException
3377         {
3378             int p = scanIPv4Address(start, n, true);
3379             if (p <= start)
3380                 failExpecting(expected, start);
3381             return p;
3382         }
3383 
3384         // Attempt to parse an IPv4 address, returning -1 on failure but
3385         // allowing the given interval to contain [:<characters>] after
3386         // the IPv4 address.
3387         //
3388         private int parseIPv4Address(int start, int n) {
3389             int p;
3390 
3391             try {
3392                 p = scanIPv4Address(start, n, false);
3393             } catch (URISyntaxException x) {
3394                 return -1;
3395             } catch (NumberFormatException nfe) {
3396                 return -1;
3397             }
3398 
3399             if (p > start && p < n) {
3400                 // IPv4 address is followed by something - check that
3401                 // it's a ":" as this is the only valid character to
3402                 // follow an address.
3403                 if (input.charAt(p) != ':') {
3404                     p = -1;
3405                 }
3406             }
3407 
3408             if (p > start)
3409                 host = input.substring(start, p);
3410 
3411             return p;
3412         }
3413 
3414         // hostname      = domainlabel [ "." ] | 1*( domainlabel "." ) toplabel [ "." ]
3415         // domainlabel   = alphanum | alphanum *( alphanum | "-" ) alphanum
3416         // toplabel      = alpha | alpha *( alphanum | "-" ) alphanum
3417         //
3418         private int parseHostname(int start, int n)
3419             throws URISyntaxException
3420         {
3421             int p = start;
3422             int q;
3423             int l = -1;                 // Start of last parsed label
3424 
3425             do {
3426                 // domainlabel = alphanum [ *( alphanum | "-" ) alphanum ]
3427                 q = scan(p, n, L_ALPHANUM, H_ALPHANUM);
3428                 if (q <= p)
3429                     break;
3430                 l = p;
3431                 if (q > p) {
3432                     p = q;
3433                     q = scan(p, n, L_ALPHANUM | L_DASH, H_ALPHANUM | H_DASH);
3434                     if (q > p) {
3435                         if (input.charAt(q - 1) == '-')
3436                             fail("Illegal character in hostname", q - 1);
3437                         p = q;
3438                     }
3439                 }
3440                 q = scan(p, n, '.');
3441                 if (q <= p)
3442                     break;
3443                 p = q;
3444             } while (p < n);
3445 
3446             if ((p < n) && !at(p, n, ':'))
3447                 fail("Illegal character in hostname", p);
3448 
3449             if (l < 0)
3450                 failExpecting("hostname", start);
3451 
3452             // for a fully qualified hostname check that the rightmost
3453             // label starts with an alpha character.
3454             if (l > start && !match(input.charAt(l), L_ALPHA, H_ALPHA)) {
3455                 fail("Illegal character in hostname", l);
3456             }
3457 
3458             host = input.substring(start, p);
3459             return p;
3460         }
3461 
3462 
3463         // IPv6 address parsing, from RFC2373: IPv6 Addressing Architecture
3464         //
3465         // Bug: The grammar in RFC2373 Appendix B does not allow addresses of
3466         // the form ::12.34.56.78, which are clearly shown in the examples
3467         // earlier in the document.  Here is the original grammar:
3468         //
3469         //   IPv6address = hexpart [ ":" IPv4address ]
3470         //   hexpart     = hexseq | hexseq "::" [ hexseq ] | "::" [ hexseq ]
3471         //   hexseq      = hex4 *( ":" hex4)
3472         //   hex4        = 1*4HEXDIG
3473         //
3474         // We therefore use the following revised grammar:
3475         //
3476         //   IPv6address = hexseq [ ":" IPv4address ]
3477         //                 | hexseq [ "::" [ hexpost ] ]
3478         //                 | "::" [ hexpost ]
3479         //   hexpost     = hexseq | hexseq ":" IPv4address | IPv4address
3480         //   hexseq      = hex4 *( ":" hex4)
3481         //   hex4        = 1*4HEXDIG
3482         //
3483         // This covers all and only the following cases:
3484         //
3485         //   hexseq
3486         //   hexseq : IPv4address
3487         //   hexseq ::
3488         //   hexseq :: hexseq
3489         //   hexseq :: hexseq : IPv4address
3490         //   hexseq :: IPv4address
3491         //   :: hexseq
3492         //   :: hexseq : IPv4address
3493         //   :: IPv4address
3494         //   ::
3495         //
3496         // Additionally we constrain the IPv6 address as follows :-
3497         //
3498         //  i.  IPv6 addresses without compressed zeros should contain
3499         //      exactly 16 bytes.
3500         //
3501         //  ii. IPv6 addresses with compressed zeros should contain
3502         //      less than 16 bytes.
3503 
3504         private int ipv6byteCount = 0;
3505 
3506         private int parseIPv6Reference(int start, int n)
3507             throws URISyntaxException
3508         {
3509             int p = start;
3510             int q;
3511             boolean compressedZeros = false;
3512 
3513             q = scanHexSeq(p, n);
3514 
3515             if (q > p) {
3516                 p = q;
3517                 if (at(p, n, "::")) {
3518                     compressedZeros = true;
3519                     p = scanHexPost(p + 2, n);
3520                 } else if (at(p, n, ':')) {
3521                     p = takeIPv4Address(p + 1,  n, "IPv4 address");
3522                     ipv6byteCount += 4;
3523                 }
3524             } else if (at(p, n, "::")) {
3525                 compressedZeros = true;
3526                 p = scanHexPost(p + 2, n);
3527             }
3528             if (p < n)
3529                 fail("Malformed IPv6 address", start);
3530             if (ipv6byteCount > 16)
3531                 fail("IPv6 address too long", start);
3532             if (!compressedZeros && ipv6byteCount < 16)
3533                 fail("IPv6 address too short", start);
3534             if (compressedZeros && ipv6byteCount == 16)
3535                 fail("Malformed IPv6 address", start);
3536 
3537             return p;
3538         }
3539 
3540         private int scanHexPost(int start, int n)
3541             throws URISyntaxException
3542         {
3543             int p = start;
3544             int q;
3545 
3546             if (p == n)
3547                 return p;
3548 
3549             q = scanHexSeq(p, n);
3550             if (q > p) {
3551                 p = q;
3552                 if (at(p, n, ':')) {
3553                     p++;
3554                     p = takeIPv4Address(p, n, "hex digits or IPv4 address");
3555                     ipv6byteCount += 4;
3556                 }
3557             } else {
3558                 p = takeIPv4Address(p, n, "hex digits or IPv4 address");
3559                 ipv6byteCount += 4;
3560             }
3561             return p;
3562         }
3563 
3564         // Scan a hex sequence; return -1 if one could not be scanned
3565         //
3566         private int scanHexSeq(int start, int n)
3567             throws URISyntaxException
3568         {
3569             int p = start;
3570             int q;
3571 
3572             q = scan(p, n, L_HEX, H_HEX);
3573             if (q <= p)
3574                 return -1;
3575             if (at(q, n, '.'))          // Beginning of IPv4 address
3576                 return -1;
3577             if (q > p + 4)
3578                 fail("IPv6 hexadecimal digit sequence too long", p);
3579             ipv6byteCount += 2;
3580             p = q;
3581             while (p < n) {
3582                 if (!at(p, n, ':'))
3583                     break;
3584                 if (at(p + 1, n, ':'))
3585                     break;              // "::"
3586                 p++;
3587                 q = scan(p, n, L_HEX, H_HEX);
3588                 if (q <= p)
3589                     failExpecting("digits for an IPv6 address", p);
3590                 if (at(q, n, '.')) {    // Beginning of IPv4 address
3591                     p--;
3592                     break;
3593                 }
3594                 if (q > p + 4)
3595                     fail("IPv6 hexadecimal digit sequence too long", p);
3596                 ipv6byteCount += 2;
3597                 p = q;
3598             }
3599 
3600             return p;
3601         }
3602 
3603     }
3604     static {
3605         SharedSecrets.setJavaNetUriAccess(
3606             new JavaNetUriAccess() {
3607                 public URI create(String scheme, String path) {
3608                     return new URI(scheme, path);
3609                 }
3610             }
3611         );
3612     }
3613 }