1 /*
   2  * Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 package java.net;
  27 
  28 import java.io.IOException;
  29 import java.io.InvalidObjectException;
  30 import java.io.ObjectInputStream;
  31 import java.io.ObjectOutputStream;
  32 import java.io.Serializable;
  33 import java.nio.ByteBuffer;
  34 import java.nio.CharBuffer;
  35 import java.nio.charset.CharsetDecoder;
  36 import java.nio.charset.CoderResult;
  37 import java.nio.charset.CodingErrorAction;
  38 import java.nio.charset.CharacterCodingException;
  39 import java.text.Normalizer;
  40 import sun.nio.cs.ThreadLocalCoders;
  41 
  42 import java.lang.Character;             // for javadoc
  43 import java.lang.NullPointerException;  // for javadoc
  44 
  45 
  46 /**
  47  * Represents a Uniform Resource Identifier (URI) reference.
  48  *
  49  * <p> Aside from some minor deviations noted below, an instance of this
  50  * class represents a URI reference as defined by
  51  * <a href="http://www.ietf.org/rfc/rfc2396.txt"><i>RFC&nbsp;2396: Uniform
  52  * Resource Identifiers (URI): Generic Syntax</i></a>, amended by <a
  53  * href="http://www.ietf.org/rfc/rfc2732.txt"><i>RFC&nbsp;2732: Format for
  54  * Literal IPv6 Addresses in URLs</i></a>. The Literal IPv6 address format
  55  * also supports scope_ids. The syntax and usage of scope_ids is described
  56  * <a href="Inet6Address.html#scoped">here</a>.
  57  * This class provides constructors for creating URI instances from
  58  * their components or by parsing their string forms, methods for accessing the
  59  * various components of an instance, and methods for normalizing, resolving,
  60  * and relativizing URI instances.  Instances of this class are immutable.
  61  *
  62  *
  63  * <h3> URI syntax and components </h3>
  64  *
  65  * At the highest level a URI reference (hereinafter simply "URI") in string
  66  * form has the syntax
  67  *
  68  * <blockquote>
  69  * [<i>scheme</i><b>{@code :}</b>]<i>scheme-specific-part</i>[<b>{@code #}</b><i>fragment</i>]
  70  * </blockquote>
  71  *
  72  * where square brackets [...] delineate optional components and the characters
  73  * <b>{@code :}</b> and <b>{@code #}</b> stand for themselves.
  74  *
  75  * <p> An <i>absolute</i> URI specifies a scheme; a URI that is not absolute is
  76  * said to be <i>relative</i>.  URIs are also classified according to whether
  77  * they are <i>opaque</i> or <i>hierarchical</i>.
  78  *
  79  * <p> An <i>opaque</i> URI is an absolute URI whose scheme-specific part does
  80  * not begin with a slash character ({@code '/'}).  Opaque URIs are not
  81  * subject to further parsing.  Some examples of opaque URIs are:
  82  *
  83  * <blockquote><table cellpadding=0 cellspacing=0 summary="layout">
  84  * <tr><td>{@code mailto:java-net@java.sun.com}<td></tr>
  85  * <tr><td>{@code news:comp.lang.java}<td></tr>
  86  * <tr><td>{@code urn:isbn:096139210x}</td></tr>
  87  * </table></blockquote>
  88  *
  89  * <p> A <i>hierarchical</i> URI is either an absolute URI whose
  90  * scheme-specific part begins with a slash character, or a relative URI, that
  91  * is, a URI that does not specify a scheme.  Some examples of hierarchical
  92  * URIs are:
  93  *
  94  * <blockquote>
  95  * {@code http://example.com/languages/java/}<br>
  96  * {@code sample/a/index.html#28}<br>
  97  * {@code ../../demo/b/index.html}<br>
  98  * {@code file:///~/calendar}
  99  * </blockquote>
 100  *
 101  * <p> A hierarchical URI is subject to further parsing according to the syntax
 102  *
 103  * <blockquote>
 104  * [<i>scheme</i><b>{@code :}</b>][<b>{@code //}</b><i>authority</i>][<i>path</i>][<b>{@code ?}</b><i>query</i>][<b>{@code #}</b><i>fragment</i>]
 105  * </blockquote>
 106  *
 107  * where the characters <b>{@code :}</b>, <b>{@code /}</b>,
 108  * <b>{@code ?}</b>, and <b>{@code #}</b> stand for themselves.  The
 109  * scheme-specific part of a hierarchical URI consists of the characters
 110  * between the scheme and fragment components.
 111  *
 112  * <p> The authority component of a hierarchical URI is, if specified, either
 113  * <i>server-based</i> or <i>registry-based</i>.  A server-based authority
 114  * parses according to the familiar syntax
 115  *
 116  * <blockquote>
 117  * [<i>user-info</i><b>{@code @}</b>]<i>host</i>[<b>{@code :}</b><i>port</i>]
 118  * </blockquote>
 119  *
 120  * where the characters <b>{@code @}</b> and <b>{@code :}</b> stand for
 121  * themselves.  Nearly all URI schemes currently in use are server-based.  An
 122  * authority component that does not parse in this way is considered to be
 123  * registry-based.
 124  *
 125  * <p> The path component of a hierarchical URI is itself said to be absolute
 126  * if it begins with a slash character ({@code '/'}); otherwise it is
 127  * relative.  The path of a hierarchical URI that is either absolute or
 128  * specifies an authority is always absolute.
 129  *
 130  * <p> All told, then, a URI instance has the following nine components:
 131  *
 132  * <blockquote><table summary="Describes the components of a URI:scheme,scheme-specific-part,authority,user-info,host,port,path,query,fragment">
 133  * <tr><th><i>Component</i></th><th><i>Type</i></th></tr>
 134  * <tr><td>scheme</td><td>{@code String}</td></tr>
 135  * <tr><td>scheme-specific-part&nbsp;&nbsp;&nbsp;&nbsp;</td><td>{@code String}</td></tr>
 136  * <tr><td>authority</td><td>{@code String}</td></tr>
 137  * <tr><td>user-info</td><td>{@code String}</td></tr>
 138  * <tr><td>host</td><td>{@code String}</td></tr>
 139  * <tr><td>port</td><td>{@code int}</td></tr>
 140  * <tr><td>path</td><td>{@code String}</td></tr>
 141  * <tr><td>query</td><td>{@code String}</td></tr>
 142  * <tr><td>fragment</td><td>{@code String}</td></tr>
 143  * </table></blockquote>
 144  *
 145  * In a given instance any particular component is either <i>undefined</i> or
 146  * <i>defined</i> with a distinct value.  Undefined string components are
 147  * represented by {@code null}, while undefined integer components are
 148  * represented by {@code -1}.  A string component may be defined to have the
 149  * empty string as its value; this is not equivalent to that component being
 150  * undefined.
 151  *
 152  * <p> Whether a particular component is or is not defined in an instance
 153  * depends upon the type of the URI being represented.  An absolute URI has a
 154  * scheme component.  An opaque URI has a scheme, a scheme-specific part, and
 155  * possibly a fragment, but has no other components.  A hierarchical URI always
 156  * has a path (though it may be empty) and a scheme-specific-part (which at
 157  * least contains the path), and may have any of the other components.  If the
 158  * authority component is present and is server-based then the host component
 159  * will be defined and the user-information and port components may be defined.
 160  *
 161  *
 162  * <h4> Operations on URI instances </h4>
 163  *
 164  * The key operations supported by this class are those of
 165  * <i>normalization</i>, <i>resolution</i>, and <i>relativization</i>.
 166  *
 167  * <p> <i>Normalization</i> is the process of removing unnecessary {@code "."}
 168  * and {@code ".."} segments from the path component of a hierarchical URI.
 169  * Each {@code "."} segment is simply removed.  A {@code ".."} segment is
 170  * removed only if it is preceded by a non-{@code ".."} segment.
 171  * Normalization has no effect upon opaque URIs.
 172  *
 173  * <p> <i>Resolution</i> is the process of resolving one URI against another,
 174  * <i>base</i> URI.  The resulting URI is constructed from components of both
 175  * URIs in the manner specified by RFC&nbsp;2396, taking components from the
 176  * base URI for those not specified in the original.  For hierarchical URIs,
 177  * the path of the original is resolved against the path of the base and then
 178  * normalized.  The result, for example, of resolving
 179  *
 180  * <blockquote>
 181  * {@code sample/a/index.html#28}
 182  * &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
 183  * &nbsp;&nbsp;&nbsp;&nbsp;(1)
 184  * </blockquote>
 185  *
 186  * against the base URI {@code http://example.com/languages/java/} is the result
 187  * URI
 188  *
 189  * <blockquote>
 190  * {@code http://example.com/languages/java/sample/a/index.html#28}
 191  * </blockquote>
 192  *
 193  * Resolving the relative URI
 194  *
 195  * <blockquote>
 196  * {@code ../../demo/b/index.html}&nbsp;&nbsp;&nbsp;&nbsp;(2)
 197  * </blockquote>
 198  *
 199  * against this result yields, in turn,
 200  *
 201  * <blockquote>
 202  * {@code http://example.com/languages/java/demo/b/index.html}
 203  * </blockquote>
 204  *
 205  * Resolution of both absolute and relative URIs, and of both absolute and
 206  * relative paths in the case of hierarchical URIs, is supported.  Resolving
 207  * the URI {@code file:///~calendar} against any other URI simply yields the
 208  * original URI, since it is absolute.  Resolving the relative URI (2) above
 209  * against the relative base URI (1) yields the normalized, but still relative,
 210  * URI
 211  *
 212  * <blockquote>
 213  * {@code demo/b/index.html}
 214  * </blockquote>
 215  *
 216  * <p> <i>Relativization</i>, finally, is the inverse of resolution: For any
 217  * two normalized URIs <i>u</i> and&nbsp;<i>v</i>,
 218  *
 219  * <blockquote>
 220  *   <i>u</i>{@code .relativize(}<i>u</i>{@code .resolve(}<i>v</i>{@code )).equals(}<i>v</i>{@code )}&nbsp;&nbsp;and<br>
 221  *   <i>u</i>{@code .resolve(}<i>u</i>{@code .relativize(}<i>v</i>{@code )).equals(}<i>v</i>{@code )}&nbsp;&nbsp;.<br>
 222  * </blockquote>
 223  *
 224  * This operation is often useful when constructing a document containing URIs
 225  * that must be made relative to the base URI of the document wherever
 226  * possible.  For example, relativizing the URI
 227  *
 228  * <blockquote>
 229  * {@code http://example.com/languages/java/sample/a/index.html#28}
 230  * </blockquote>
 231  *
 232  * against the base URI
 233  *
 234  * <blockquote>
 235  * {@code http://example.com/languages/java/}
 236  * </blockquote>
 237  *
 238  * yields the relative URI {@code sample/a/index.html#28}.
 239  *
 240  *
 241  * <h4> Character categories </h4>
 242  *
 243  * RFC&nbsp;2396 specifies precisely which characters are permitted in the
 244  * various components of a URI reference.  The following categories, most of
 245  * which are taken from that specification, are used below to describe these
 246  * constraints:
 247  *
 248  * <blockquote><table cellspacing=2 summary="Describes categories alpha,digit,alphanum,unreserved,punct,reserved,escaped,and other">
 249  *   <tr><th valign=top><i>alpha</i></th>
 250  *       <td>The US-ASCII alphabetic characters,
 251  *        {@code 'A'}&nbsp;through&nbsp;{@code 'Z'}
 252  *        and {@code 'a'}&nbsp;through&nbsp;{@code 'z'}</td></tr>
 253  *   <tr><th valign=top><i>digit</i></th>
 254  *       <td>The US-ASCII decimal digit characters,
 255  *       {@code '0'}&nbsp;through&nbsp;{@code '9'}</td></tr>
 256  *   <tr><th valign=top><i>alphanum</i></th>
 257  *       <td>All <i>alpha</i> and <i>digit</i> characters</td></tr>
 258  *   <tr><th valign=top><i>unreserved</i>&nbsp;&nbsp;&nbsp;&nbsp;</th>
 259  *       <td>All <i>alphanum</i> characters together with those in the string
 260  *        {@code "_-!.~'()*"}</td></tr>
 261  *   <tr><th valign=top><i>punct</i></th>
 262  *       <td>The characters in the string {@code ",;:$&+="}</td></tr>
 263  *   <tr><th valign=top><i>reserved</i></th>
 264  *       <td>All <i>punct</i> characters together with those in the string
 265  *        {@code "?/[]@"}</td></tr>
 266  *   <tr><th valign=top><i>escaped</i></th>
 267  *       <td>Escaped octets, that is, triplets consisting of the percent
 268  *           character ({@code '%'}) followed by two hexadecimal digits
 269  *           ({@code '0'}-{@code '9'}, {@code 'A'}-{@code 'F'}, and
 270  *           {@code 'a'}-{@code 'f'})</td></tr>
 271  *   <tr><th valign=top><i>other</i></th>
 272  *       <td>The Unicode characters that are not in the US-ASCII character set,
 273  *           are not control characters (according to the {@link
 274  *           java.lang.Character#isISOControl(char) Character.isISOControl}
 275  *           method), and are not space characters (according to the {@link
 276  *           java.lang.Character#isSpaceChar(char) Character.isSpaceChar}
 277  *           method)&nbsp;&nbsp;<i>(<b>Deviation from RFC 2396</b>, which is
 278  *           limited to US-ASCII)</i></td></tr>
 279  * </table></blockquote>
 280  *
 281  * <p><a name="legal-chars"></a> The set of all legal URI characters consists of
 282  * the <i>unreserved</i>, <i>reserved</i>, <i>escaped</i>, and <i>other</i>
 283  * characters.
 284  *
 285  *
 286  * <h4> Escaped octets, quotation, encoding, and decoding </h4>
 287  *
 288  * RFC 2396 allows escaped octets to appear in the user-info, path, query, and
 289  * fragment components.  Escaping serves two purposes in URIs:
 290  *
 291  * <ul>
 292  *
 293  *   <li><p> To <i>encode</i> non-US-ASCII characters when a URI is required to
 294  *   conform strictly to RFC&nbsp;2396 by not containing any <i>other</i>
 295  *   characters.  </p></li>
 296  *
 297  *   <li><p> To <i>quote</i> characters that are otherwise illegal in a
 298  *   component.  The user-info, path, query, and fragment components differ
 299  *   slightly in terms of which characters are considered legal and illegal.
 300  *   </p></li>
 301  *
 302  * </ul>
 303  *
 304  * These purposes are served in this class by three related operations:
 305  *
 306  * <ul>
 307  *
 308  *   <li><p><a name="encode"></a> A character is <i>encoded</i> by replacing it
 309  *   with the sequence of escaped octets that represent that character in the
 310  *   UTF-8 character set.  The Euro currency symbol ({@code '\u005Cu20AC'}),
 311  *   for example, is encoded as {@code "%E2%82%AC"}.  <i>(<b>Deviation from
 312  *   RFC&nbsp;2396</b>, which does not specify any particular character
 313  *   set.)</i> </p></li>
 314  *
 315  *   <li><p><a name="quote"></a> An illegal character is <i>quoted</i> simply by
 316  *   encoding it.  The space character, for example, is quoted by replacing it
 317  *   with {@code "%20"}.  UTF-8 contains US-ASCII, hence for US-ASCII
 318  *   characters this transformation has exactly the effect required by
 319  *   RFC&nbsp;2396. </p></li>
 320  *
 321  *   <li><p><a name="decode"></a>
 322  *   A sequence of escaped octets is <i>decoded</i> by
 323  *   replacing it with the sequence of characters that it represents in the
 324  *   UTF-8 character set.  UTF-8 contains US-ASCII, hence decoding has the
 325  *   effect of de-quoting any quoted US-ASCII characters as well as that of
 326  *   decoding any encoded non-US-ASCII characters.  If a <a
 327  *   href="../nio/charset/CharsetDecoder.html#ce">decoding error</a> occurs
 328  *   when decoding the escaped octets then the erroneous octets are replaced by
 329  *   {@code '\u005CuFFFD'}, the Unicode replacement character.  </p></li>
 330  *
 331  * </ul>
 332  *
 333  * These operations are exposed in the constructors and methods of this class
 334  * as follows:
 335  *
 336  * <ul>
 337  *
 338  *   <li><p> The {@linkplain #URI(java.lang.String) single-argument
 339  *   constructor} requires any illegal characters in its argument to be
 340  *   quoted and preserves any escaped octets and <i>other</i> characters that
 341  *   are present.  </p></li>
 342  *
 343  *   <li><p> The {@linkplain
 344  *   #URI(java.lang.String,java.lang.String,java.lang.String,int,java.lang.String,java.lang.String,java.lang.String)
 345  *   multi-argument constructors} quote illegal characters as
 346  *   required by the components in which they appear.  The percent character
 347  *   ({@code '%'}) is always quoted by these constructors.  Any <i>other</i>
 348  *   characters are preserved.  </p></li>
 349  *
 350  *   <li><p> The {@link #getRawUserInfo() getRawUserInfo}, {@link #getRawPath()
 351  *   getRawPath}, {@link #getRawQuery() getRawQuery}, {@link #getRawFragment()
 352  *   getRawFragment}, {@link #getRawAuthority() getRawAuthority}, and {@link
 353  *   #getRawSchemeSpecificPart() getRawSchemeSpecificPart} methods return the
 354  *   values of their corresponding components in raw form, without interpreting
 355  *   any escaped octets.  The strings returned by these methods may contain
 356  *   both escaped octets and <i>other</i> characters, and will not contain any
 357  *   illegal characters.  </p></li>
 358  *
 359  *   <li><p> The {@link #getUserInfo() getUserInfo}, {@link #getPath()
 360  *   getPath}, {@link #getQuery() getQuery}, {@link #getFragment()
 361  *   getFragment}, {@link #getAuthority() getAuthority}, and {@link
 362  *   #getSchemeSpecificPart() getSchemeSpecificPart} methods decode any escaped
 363  *   octets in their corresponding components.  The strings returned by these
 364  *   methods may contain both <i>other</i> characters and illegal characters,
 365  *   and will not contain any escaped octets.  </p></li>
 366  *
 367  *   <li><p> The {@link #toString() toString} method returns a URI string with
 368  *   all necessary quotation but which may contain <i>other</i> characters.
 369  *   </p></li>
 370  *
 371  *   <li><p> The {@link #toASCIIString() toASCIIString} method returns a fully
 372  *   quoted and encoded URI string that does not contain any <i>other</i>
 373  *   characters.  </p></li>
 374  *
 375  * </ul>
 376  *
 377  *
 378  * <h4> Identities </h4>
 379  *
 380  * For any URI <i>u</i>, it is always the case that
 381  *
 382  * <blockquote>
 383  * {@code new URI(}<i>u</i>{@code .toString()).equals(}<i>u</i>{@code )}&nbsp;.
 384  * </blockquote>
 385  *
 386  * For any URI <i>u</i> that does not contain redundant syntax such as two
 387  * slashes before an empty authority (as in {@code file:///tmp/}&nbsp;) or a
 388  * colon following a host name but no port (as in
 389  * {@code http://java.sun.com:}&nbsp;), and that does not encode characters
 390  * except those that must be quoted, the following identities also hold:
 391  * <pre>
 392  *     new URI(<i>u</i>.getScheme(),
 393  *             <i>u</i>.getSchemeSpecificPart(),
 394  *             <i>u</i>.getFragment())
 395  *     .equals(<i>u</i>)</pre>
 396  * in all cases,
 397  * <pre>
 398  *     new URI(<i>u</i>.getScheme(),
 399  *             <i>u</i>.getAuthority(),
 400  *             <i>u</i>.getPath(), <i>u</i>.getQuery(),
 401  *             <i>u</i>.getFragment())
 402  *     .equals(<i>u</i>)</pre>
 403  * if <i>u</i> is hierarchical, and
 404  * <pre>
 405  *     new URI(<i>u</i>.getScheme(),
 406  *             <i>u</i>.getUserInfo(), <i>u</i>.getHost(), <i>u</i>.getPort(),
 407  *             <i>u</i>.getPath(), <i>u</i>.getQuery(),
 408  *             <i>u</i>.getFragment())
 409  *     .equals(<i>u</i>)</pre>
 410  * if <i>u</i> is hierarchical and has either no authority or a server-based
 411  * authority.
 412  *
 413  *
 414  * <h4> URIs, URLs, and URNs </h4>
 415  *
 416  * A URI is a uniform resource <i>identifier</i> while a URL is a uniform
 417  * resource <i>locator</i>.  Hence every URL is a URI, abstractly speaking, but
 418  * not every URI is a URL.  This is because there is another subcategory of
 419  * URIs, uniform resource <i>names</i> (URNs), which name resources but do not
 420  * specify how to locate them.  The {@code mailto}, {@code news}, and
 421  * {@code isbn} URIs shown above are examples of URNs.
 422  *
 423  * <p> The conceptual distinction between URIs and URLs is reflected in the
 424  * differences between this class and the {@link URL} class.
 425  *
 426  * <p> An instance of this class represents a URI reference in the syntactic
 427  * sense defined by RFC&nbsp;2396.  A URI may be either absolute or relative.
 428  * A URI string is parsed according to the generic syntax without regard to the
 429  * scheme, if any, that it specifies.  No lookup of the host, if any, is
 430  * performed, and no scheme-dependent stream handler is constructed.  Equality,
 431  * hashing, and comparison are defined strictly in terms of the character
 432  * content of the instance.  In other words, a URI instance is little more than
 433  * a structured string that supports the syntactic, scheme-independent
 434  * operations of comparison, normalization, resolution, and relativization.
 435  *
 436  * <p> An instance of the {@link URL} class, by contrast, represents the
 437  * syntactic components of a URL together with some of the information required
 438  * to access the resource that it describes.  A URL must be absolute, that is,
 439  * it must always specify a scheme.  A URL string is parsed according to its
 440  * scheme.  A stream handler is always established for a URL, and in fact it is
 441  * impossible to create a URL instance for a scheme for which no handler is
 442  * available.  Equality and hashing depend upon both the scheme and the
 443  * Internet address of the host, if any; comparison is not defined.  In other
 444  * words, a URL is a structured string that supports the syntactic operation of
 445  * resolution as well as the network I/O operations of looking up the host and
 446  * opening a connection to the specified resource.
 447  *
 448  *
 449  * @author Mark Reinhold
 450  * @since 1.4
 451  *
 452  * @see <a href="http://www.ietf.org/rfc/rfc2279.txt"><i>RFC&nbsp;2279: UTF-8, a
 453  * transformation format of ISO 10646</i></a>, <br><a
 454  * href="http://www.ietf.org/rfc/rfc2373.txt"><i>RFC&nbsp;2373: IPv6 Addressing
 455  * Architecture</i></a>, <br><a
 456  * href="http://www.ietf.org/rfc/rfc2396.txt"><i>RFC&nbsp;2396: Uniform
 457  * Resource Identifiers (URI): Generic Syntax</i></a>, <br><a
 458  * href="http://www.ietf.org/rfc/rfc2732.txt"><i>RFC&nbsp;2732: Format for
 459  * Literal IPv6 Addresses in URLs</i></a>, <br><a
 460  * href="URISyntaxException.html">URISyntaxException</a>
 461  */
 462 
 463 public final class URI
 464     implements Comparable<URI>, Serializable
 465 {
 466 
 467     // Note: Comments containing the word "ASSERT" indicate places where a
 468     // throw of an InternalError should be replaced by an appropriate assertion
 469     // statement once asserts are enabled in the build.
 470 
 471     static final long serialVersionUID = -6052424284110960213L;
 472 
 473 
 474     // -- Properties and components of this instance --
 475 
 476     // Components of all URIs: [<scheme>:]<scheme-specific-part>[#<fragment>]
 477     private transient String scheme;            // null ==> relative URI
 478     private transient String fragment;
 479 
 480     // Hierarchical URI components: [//<authority>]<path>[?<query>]
 481     private transient String authority;         // Registry or server
 482 
 483     // Server-based authority: [<userInfo>@]<host>[:<port>]
 484     private transient String userInfo;
 485     private transient String host;              // null ==> registry-based
 486     private transient int port = -1;            // -1 ==> undefined
 487 
 488     // Remaining components of hierarchical URIs
 489     private transient String path;              // null ==> opaque
 490     private transient String query;
 491 
 492     // The remaining fields may be computed on demand, which is safe even in
 493     // the face of multiple threads racing to initialize them
 494     private transient String schemeSpecificPart;
 495     private transient int hash;        // Zero ==> undefined
 496 
 497     private transient String decodedUserInfo;
 498     private transient String decodedAuthority;
 499     private transient String decodedPath;
 500     private transient String decodedQuery;
 501     private transient String decodedFragment;
 502     private transient String decodedSchemeSpecificPart;
 503 
 504     /**
 505      * The string form of this URI.
 506      *
 507      * @serial
 508      */
 509     private volatile String string;             // The only serializable field
 510 
 511 
 512 
 513     // -- Constructors and factories --
 514 
 515     private URI() { }                           // Used internally
 516 
 517     /**
 518      * Constructs a URI by parsing the given string.
 519      *
 520      * <p> This constructor parses the given string exactly as specified by the
 521      * grammar in <a
 522      * href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>,
 523      * Appendix&nbsp;A, <b><i>except for the following deviations:</i></b> </p>
 524      *
 525      * <ul>
 526      *
 527      *   <li><p> An empty authority component is permitted as long as it is
 528      *   followed by a non-empty path, a query component, or a fragment
 529      *   component.  This allows the parsing of URIs such as
 530      *   {@code "file:///foo/bar"}, which seems to be the intent of
 531      *   RFC&nbsp;2396 although the grammar does not permit it.  If the
 532      *   authority component is empty then the user-information, host, and port
 533      *   components are undefined. </p></li>
 534      *
 535      *   <li><p> Empty relative paths are permitted; this seems to be the
 536      *   intent of RFC&nbsp;2396 although the grammar does not permit it.  The
 537      *   primary consequence of this deviation is that a standalone fragment
 538      *   such as {@code "#foo"} parses as a relative URI with an empty path
 539      *   and the given fragment, and can be usefully <a
 540      *   href="#resolve-frag">resolved</a> against a base URI.
 541      *
 542      *   <li><p> IPv4 addresses in host components are parsed rigorously, as
 543      *   specified by <a
 544      *   href="http://www.ietf.org/rfc/rfc2732.txt">RFC&nbsp;2732</a>: Each
 545      *   element of a dotted-quad address must contain no more than three
 546      *   decimal digits.  Each element is further constrained to have a value
 547      *   no greater than 255. </p></li>
 548      *
 549      *   <li> <p> Hostnames in host components that comprise only a single
 550      *   domain label are permitted to start with an <i>alphanum</i>
 551      *   character. This seems to be the intent of <a
 552      *   href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>
 553      *   section&nbsp;3.2.2 although the grammar does not permit it. The
 554      *   consequence of this deviation is that the authority component of a
 555      *   hierarchical URI such as {@code s://123}, will parse as a server-based
 556      *   authority. </p></li>
 557      *
 558      *   <li><p> IPv6 addresses are permitted for the host component.  An IPv6
 559      *   address must be enclosed in square brackets ({@code '['} and
 560      *   {@code ']'}) as specified by <a
 561      *   href="http://www.ietf.org/rfc/rfc2732.txt">RFC&nbsp;2732</a>.  The
 562      *   IPv6 address itself must parse according to <a
 563      *   href="http://www.ietf.org/rfc/rfc2373.txt">RFC&nbsp;2373</a>.  IPv6
 564      *   addresses are further constrained to describe no more than sixteen
 565      *   bytes of address information, a constraint implicit in RFC&nbsp;2373
 566      *   but not expressible in the grammar. </p></li>
 567      *
 568      *   <li><p> Characters in the <i>other</i> category are permitted wherever
 569      *   RFC&nbsp;2396 permits <i>escaped</i> octets, that is, in the
 570      *   user-information, path, query, and fragment components, as well as in
 571      *   the authority component if the authority is registry-based.  This
 572      *   allows URIs to contain Unicode characters beyond those in the US-ASCII
 573      *   character set. </p></li>
 574      *
 575      * </ul>
 576      *
 577      * @param  str   The string to be parsed into a URI
 578      *
 579      * @throws  NullPointerException
 580      *          If {@code str} is {@code null}
 581      *
 582      * @throws  URISyntaxException
 583      *          If the given string violates RFC&nbsp;2396, as augmented
 584      *          by the above deviations
 585      */
 586     public URI(String str) throws URISyntaxException {
 587         new Parser(str).parse(false);
 588     }
 589 
 590     /**
 591      * Constructs a hierarchical URI from the given components.
 592      *
 593      * <p> If a scheme is given then the path, if also given, must either be
 594      * empty or begin with a slash character ({@code '/'}).  Otherwise a
 595      * component of the new URI may be left undefined by passing {@code null}
 596      * for the corresponding parameter or, in the case of the {@code port}
 597      * parameter, by passing {@code -1}.
 598      *
 599      * <p> This constructor first builds a URI string from the given components
 600      * according to the rules specified in <a
 601      * href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>,
 602      * section&nbsp;5.2, step&nbsp;7: </p>
 603      *
 604      * <ol>
 605      *
 606      *   <li><p> Initially, the result string is empty. </p></li>
 607      *
 608      *   <li><p> If a scheme is given then it is appended to the result,
 609      *   followed by a colon character ({@code ':'}).  </p></li>
 610      *
 611      *   <li><p> If user information, a host, or a port are given then the
 612      *   string {@code "//"} is appended.  </p></li>
 613      *
 614      *   <li><p> If user information is given then it is appended, followed by
 615      *   a commercial-at character ({@code '@'}).  Any character not in the
 616      *   <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>
 617      *   categories is <a href="#quote">quoted</a>.  </p></li>
 618      *
 619      *   <li><p> If a host is given then it is appended.  If the host is a
 620      *   literal IPv6 address but is not enclosed in square brackets
 621      *   ({@code '['} and {@code ']'}) then the square brackets are added.
 622      *   </p></li>
 623      *
 624      *   <li><p> If a port number is given then a colon character
 625      *   ({@code ':'}) is appended, followed by the port number in decimal.
 626      *   </p></li>
 627      *
 628      *   <li><p> If a path is given then it is appended.  Any character not in
 629      *   the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>
 630      *   categories, and not equal to the slash character ({@code '/'}) or the
 631      *   commercial-at character ({@code '@'}), is quoted.  </p></li>
 632      *
 633      *   <li><p> If a query is given then a question-mark character
 634      *   ({@code '?'}) is appended, followed by the query.  Any character that
 635      *   is not a <a href="#legal-chars">legal URI character</a> is quoted.
 636      *   </p></li>
 637      *
 638      *   <li><p> Finally, if a fragment is given then a hash character
 639      *   ({@code '#'}) is appended, followed by the fragment.  Any character
 640      *   that is not a legal URI character is quoted.  </p></li>
 641      *
 642      * </ol>
 643      *
 644      * <p> The resulting URI string is then parsed as if by invoking the {@link
 645      * #URI(String)} constructor and then invoking the {@link
 646      * #parseServerAuthority()} method upon the result; this may cause a {@link
 647      * URISyntaxException} to be thrown.  </p>
 648      *
 649      * @param   scheme    Scheme name
 650      * @param   userInfo  User name and authorization information
 651      * @param   host      Host name
 652      * @param   port      Port number
 653      * @param   path      Path
 654      * @param   query     Query
 655      * @param   fragment  Fragment
 656      *
 657      * @throws URISyntaxException
 658      *         If both a scheme and a path are given but the path is relative,
 659      *         if the URI string constructed from the given components violates
 660      *         RFC&nbsp;2396, or if the authority component of the string is
 661      *         present but cannot be parsed as a server-based authority
 662      */
 663     public URI(String scheme,
 664                String userInfo, String host, int port,
 665                String path, String query, String fragment)
 666         throws URISyntaxException
 667     {
 668         String s = toString(scheme, null,
 669                             null, userInfo, host, port,
 670                             path, query, fragment);
 671         checkPath(s, scheme, path);
 672         new Parser(s).parse(true);
 673     }
 674 
 675     /**
 676      * Constructs a hierarchical URI from the given components.
 677      *
 678      * <p> If a scheme is given then the path, if also given, must either be
 679      * empty or begin with a slash character ({@code '/'}).  Otherwise a
 680      * component of the new URI may be left undefined by passing {@code null}
 681      * for the corresponding parameter.
 682      *
 683      * <p> This constructor first builds a URI string from the given components
 684      * according to the rules specified in <a
 685      * href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>,
 686      * section&nbsp;5.2, step&nbsp;7: </p>
 687      *
 688      * <ol>
 689      *
 690      *   <li><p> Initially, the result string is empty.  </p></li>
 691      *
 692      *   <li><p> If a scheme is given then it is appended to the result,
 693      *   followed by a colon character ({@code ':'}).  </p></li>
 694      *
 695      *   <li><p> If an authority is given then the string {@code "//"} is
 696      *   appended, followed by the authority.  If the authority contains a
 697      *   literal IPv6 address then the address must be enclosed in square
 698      *   brackets ({@code '['} and {@code ']'}).  Any character not in the
 699      *   <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>
 700      *   categories, and not equal to the commercial-at character
 701      *   ({@code '@'}), is <a href="#quote">quoted</a>.  </p></li>
 702      *
 703      *   <li><p> If a path is given then it is appended.  Any character not in
 704      *   the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>
 705      *   categories, and not equal to the slash character ({@code '/'}) or the
 706      *   commercial-at character ({@code '@'}), is quoted.  </p></li>
 707      *
 708      *   <li><p> If a query is given then a question-mark character
 709      *   ({@code '?'}) is appended, followed by the query.  Any character that
 710      *   is not a <a href="#legal-chars">legal URI character</a> is quoted.
 711      *   </p></li>
 712      *
 713      *   <li><p> Finally, if a fragment is given then a hash character
 714      *   ({@code '#'}) is appended, followed by the fragment.  Any character
 715      *   that is not a legal URI character is quoted.  </p></li>
 716      *
 717      * </ol>
 718      *
 719      * <p> The resulting URI string is then parsed as if by invoking the {@link
 720      * #URI(String)} constructor and then invoking the {@link
 721      * #parseServerAuthority()} method upon the result; this may cause a {@link
 722      * URISyntaxException} to be thrown.  </p>
 723      *
 724      * @param   scheme     Scheme name
 725      * @param   authority  Authority
 726      * @param   path       Path
 727      * @param   query      Query
 728      * @param   fragment   Fragment
 729      *
 730      * @throws URISyntaxException
 731      *         If both a scheme and a path are given but the path is relative,
 732      *         if the URI string constructed from the given components violates
 733      *         RFC&nbsp;2396, or if the authority component of the string is
 734      *         present but cannot be parsed as a server-based authority
 735      */
 736     public URI(String scheme,
 737                String authority,
 738                String path, String query, String fragment)
 739         throws URISyntaxException
 740     {
 741         String s = toString(scheme, null,
 742                             authority, null, null, -1,
 743                             path, query, fragment);
 744         checkPath(s, scheme, path);
 745         new Parser(s).parse(false);
 746     }
 747 
 748     /**
 749      * Constructs a hierarchical URI from the given components.
 750      *
 751      * <p> A component may be left undefined by passing {@code null}.
 752      *
 753      * <p> This convenience constructor works as if by invoking the
 754      * seven-argument constructor as follows:
 755      *
 756      * <blockquote>
 757      * {@code new} {@link #URI(String, String, String, int, String, String, String)
 758      * URI}{@code (scheme, null, host, -1, path, null, fragment);}
 759      * </blockquote>
 760      *
 761      * @param   scheme    Scheme name
 762      * @param   host      Host name
 763      * @param   path      Path
 764      * @param   fragment  Fragment
 765      *
 766      * @throws  URISyntaxException
 767      *          If the URI string constructed from the given components
 768      *          violates RFC&nbsp;2396
 769      */
 770     public URI(String scheme, String host, String path, String fragment)
 771         throws URISyntaxException
 772     {
 773         this(scheme, null, host, -1, path, null, fragment);
 774     }
 775 
 776     /**
 777      * Constructs a URI from the given components.
 778      *
 779      * <p> A component may be left undefined by passing {@code null}.
 780      *
 781      * <p> This constructor first builds a URI in string form using the given
 782      * components as follows:  </p>
 783      *
 784      * <ol>
 785      *
 786      *   <li><p> Initially, the result string is empty.  </p></li>
 787      *
 788      *   <li><p> If a scheme is given then it is appended to the result,
 789      *   followed by a colon character ({@code ':'}).  </p></li>
 790      *
 791      *   <li><p> If a scheme-specific part is given then it is appended.  Any
 792      *   character that is not a <a href="#legal-chars">legal URI character</a>
 793      *   is <a href="#quote">quoted</a>.  </p></li>
 794      *
 795      *   <li><p> Finally, if a fragment is given then a hash character
 796      *   ({@code '#'}) is appended to the string, followed by the fragment.
 797      *   Any character that is not a legal URI character is quoted.  </p></li>
 798      *
 799      * </ol>
 800      *
 801      * <p> The resulting URI string is then parsed in order to create the new
 802      * URI instance as if by invoking the {@link #URI(String)} constructor;
 803      * this may cause a {@link URISyntaxException} to be thrown.  </p>
 804      *
 805      * @param   scheme    Scheme name
 806      * @param   ssp       Scheme-specific part
 807      * @param   fragment  Fragment
 808      *
 809      * @throws  URISyntaxException
 810      *          If the URI string constructed from the given components
 811      *          violates RFC&nbsp;2396
 812      */
 813     public URI(String scheme, String ssp, String fragment)
 814         throws URISyntaxException
 815     {
 816         new Parser(toString(scheme, ssp,
 817                             null, null, null, -1,
 818                             null, null, fragment))
 819             .parse(false);
 820     }
 821 
 822     /**
 823      * Creates a URI by parsing the given string.
 824      *
 825      * <p> This convenience factory method works as if by invoking the {@link
 826      * #URI(String)} constructor; any {@link URISyntaxException} thrown by the
 827      * constructor is caught and wrapped in a new {@link
 828      * IllegalArgumentException} object, which is then thrown.
 829      *
 830      * <p> This method is provided for use in situations where it is known that
 831      * the given string is a legal URI, for example for URI constants declared
 832      * within in a program, and so it would be considered a programming error
 833      * for the string not to parse as such.  The constructors, which throw
 834      * {@link URISyntaxException} directly, should be used situations where a
 835      * URI is being constructed from user input or from some other source that
 836      * may be prone to errors.  </p>
 837      *
 838      * @param  str   The string to be parsed into a URI
 839      * @return The new URI
 840      *
 841      * @throws  NullPointerException
 842      *          If {@code str} is {@code null}
 843      *
 844      * @throws  IllegalArgumentException
 845      *          If the given string violates RFC&nbsp;2396
 846      */
 847     public static URI create(String str) {
 848         try {
 849             return new URI(str);
 850         } catch (URISyntaxException x) {
 851             throw new IllegalArgumentException(x.getMessage(), x);
 852         }
 853     }
 854 
 855 
 856     // -- Operations --
 857 
 858     /**
 859      * Attempts to parse this URI's authority component, if defined, into
 860      * user-information, host, and port components.
 861      *
 862      * <p> If this URI's authority component has already been recognized as
 863      * being server-based then it will already have been parsed into
 864      * user-information, host, and port components.  In this case, or if this
 865      * URI has no authority component, this method simply returns this URI.
 866      *
 867      * <p> Otherwise this method attempts once more to parse the authority
 868      * component into user-information, host, and port components, and throws
 869      * an exception describing why the authority component could not be parsed
 870      * in that way.
 871      *
 872      * <p> This method is provided because the generic URI syntax specified in
 873      * <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>
 874      * cannot always distinguish a malformed server-based authority from a
 875      * legitimate registry-based authority.  It must therefore treat some
 876      * instances of the former as instances of the latter.  The authority
 877      * component in the URI string {@code "//foo:bar"}, for example, is not a
 878      * legal server-based authority but it is legal as a registry-based
 879      * authority.
 880      *
 881      * <p> In many common situations, for example when working URIs that are
 882      * known to be either URNs or URLs, the hierarchical URIs being used will
 883      * always be server-based.  They therefore must either be parsed as such or
 884      * treated as an error.  In these cases a statement such as
 885      *
 886      * <blockquote>
 887      * {@code URI }<i>u</i>{@code  = new URI(str).parseServerAuthority();}
 888      * </blockquote>
 889      *
 890      * <p> can be used to ensure that <i>u</i> always refers to a URI that, if
 891      * it has an authority component, has a server-based authority with proper
 892      * user-information, host, and port components.  Invoking this method also
 893      * ensures that if the authority could not be parsed in that way then an
 894      * appropriate diagnostic message can be issued based upon the exception
 895      * that is thrown. </p>
 896      *
 897      * @return  A URI whose authority field has been parsed
 898      *          as a server-based authority
 899      *
 900      * @throws  URISyntaxException
 901      *          If the authority component of this URI is defined
 902      *          but cannot be parsed as a server-based authority
 903      *          according to RFC&nbsp;2396
 904      */
 905     public URI parseServerAuthority()
 906         throws URISyntaxException
 907     {
 908         // We could be clever and cache the error message and index from the
 909         // exception thrown during the original parse, but that would require
 910         // either more fields or a more-obscure representation.
 911         if ((host != null) || (authority == null))
 912             return this;
 913         new Parser(toString()).parse(true);
 914         return this;
 915     }
 916 
 917     /**
 918      * Normalizes this URI's path.
 919      *
 920      * <p> If this URI is opaque, or if its path is already in normal form,
 921      * then this URI is returned.  Otherwise a new URI is constructed that is
 922      * identical to this URI except that its path is computed by normalizing
 923      * this URI's path in a manner consistent with <a
 924      * href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>,
 925      * section&nbsp;5.2, step&nbsp;6, sub-steps&nbsp;c through&nbsp;f; that is:
 926      * </p>
 927      *
 928      * <ol>
 929      *
 930      *   <li><p> All {@code "."} segments are removed. </p></li>
 931      *
 932      *   <li><p> If a {@code ".."} segment is preceded by a non-{@code ".."}
 933      *   segment then both of these segments are removed.  This step is
 934      *   repeated until it is no longer applicable. </p></li>
 935      *
 936      *   <li><p> If the path is relative, and if its first segment contains a
 937      *   colon character ({@code ':'}), then a {@code "."} segment is
 938      *   prepended.  This prevents a relative URI with a path such as
 939      *   {@code "a:b/c/d"} from later being re-parsed as an opaque URI with a
 940      *   scheme of {@code "a"} and a scheme-specific part of {@code "b/c/d"}.
 941      *   <b><i>(Deviation from RFC&nbsp;2396)</i></b> </p></li>
 942      *
 943      * </ol>
 944      *
 945      * <p> A normalized path will begin with one or more {@code ".."} segments
 946      * if there were insufficient non-{@code ".."} segments preceding them to
 947      * allow their removal.  A normalized path will begin with a {@code "."}
 948      * segment if one was inserted by step 3 above.  Otherwise, a normalized
 949      * path will not contain any {@code "."} or {@code ".."} segments. </p>
 950      *
 951      * @return  A URI equivalent to this URI,
 952      *          but whose path is in normal form
 953      */
 954     public URI normalize() {
 955         return normalize(this);
 956     }
 957 
 958     /**
 959      * Resolves the given URI against this URI.
 960      *
 961      * <p> If the given URI is already absolute, or if this URI is opaque, then
 962      * the given URI is returned.
 963      *
 964      * <p><a name="resolve-frag"></a> If the given URI's fragment component is
 965      * defined, its path component is empty, and its scheme, authority, and
 966      * query components are undefined, then a URI with the given fragment but
 967      * with all other components equal to those of this URI is returned.  This
 968      * allows a URI representing a standalone fragment reference, such as
 969      * {@code "#foo"}, to be usefully resolved against a base URI.
 970      *
 971      * <p> Otherwise this method constructs a new hierarchical URI in a manner
 972      * consistent with <a
 973      * href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>,
 974      * section&nbsp;5.2; that is: </p>
 975      *
 976      * <ol>
 977      *
 978      *   <li><p> A new URI is constructed with this URI's scheme and the given
 979      *   URI's query and fragment components. </p></li>
 980      *
 981      *   <li><p> If the given URI has an authority component then the new URI's
 982      *   authority and path are taken from the given URI. </p></li>
 983      *
 984      *   <li><p> Otherwise the new URI's authority component is copied from
 985      *   this URI, and its path is computed as follows: </p>
 986      *
 987      *   <ol>
 988      *
 989      *     <li><p> If the given URI's path is absolute then the new URI's path
 990      *     is taken from the given URI. </p></li>
 991      *
 992      *     <li><p> Otherwise the given URI's path is relative, and so the new
 993      *     URI's path is computed by resolving the path of the given URI
 994      *     against the path of this URI.  This is done by concatenating all but
 995      *     the last segment of this URI's path, if any, with the given URI's
 996      *     path and then normalizing the result as if by invoking the {@link
 997      *     #normalize() normalize} method. </p></li>
 998      *
 999      *   </ol></li>
1000      *
1001      * </ol>
1002      *
1003      * <p> The result of this method is absolute if, and only if, either this
1004      * URI is absolute or the given URI is absolute.  </p>
1005      *
1006      * @param  uri  The URI to be resolved against this URI
1007      * @return The resulting URI
1008      *
1009      * @throws  NullPointerException
1010      *          If {@code uri} is {@code null}
1011      */
1012     public URI resolve(URI uri) {
1013         return resolve(this, uri);
1014     }
1015 
1016     /**
1017      * Constructs a new URI by parsing the given string and then resolving it
1018      * against this URI.
1019      *
1020      * <p> This convenience method works as if invoking it were equivalent to
1021      * evaluating the expression {@link #resolve(java.net.URI)
1022      * resolve}{@code (URI.}{@link #create(String) create}{@code (str))}. </p>
1023      *
1024      * @param  str   The string to be parsed into a URI
1025      * @return The resulting URI
1026      *
1027      * @throws  NullPointerException
1028      *          If {@code str} is {@code null}
1029      *
1030      * @throws  IllegalArgumentException
1031      *          If the given string violates RFC&nbsp;2396
1032      */
1033     public URI resolve(String str) {
1034         return resolve(URI.create(str));
1035     }
1036 
1037     /**
1038      * Relativizes the given URI against this URI.
1039      *
1040      * <p> The relativization of the given URI against this URI is computed as
1041      * follows: </p>
1042      *
1043      * <ol>
1044      *
1045      *   <li><p> If either this URI or the given URI are opaque, or if the
1046      *   scheme and authority components of the two URIs are not identical, or
1047      *   if the path of this URI is not a prefix of the path of the given URI,
1048      *   then the given URI is returned. </p></li>
1049      *
1050      *   <li><p> Otherwise a new relative hierarchical URI is constructed with
1051      *   query and fragment components taken from the given URI and with a path
1052      *   component computed by removing this URI's path from the beginning of
1053      *   the given URI's path. </p></li>
1054      *
1055      * </ol>
1056      *
1057      * @param  uri  The URI to be relativized against this URI
1058      * @return The resulting URI
1059      *
1060      * @throws  NullPointerException
1061      *          If {@code uri} is {@code null}
1062      */
1063     public URI relativize(URI uri) {
1064         return relativize(this, uri);
1065     }
1066 
1067     /**
1068      * Constructs a URL from this URI.
1069      *
1070      * <p> This convenience method works as if invoking it were equivalent to
1071      * evaluating the expression {@code new URL(this.toString())} after
1072      * first checking that this URI is absolute. </p>
1073      *
1074      * @return  A URL constructed from this URI
1075      *
1076      * @throws  IllegalArgumentException
1077      *          If this URL is not absolute
1078      *
1079      * @throws  MalformedURLException
1080      *          If a protocol handler for the URL could not be found,
1081      *          or if some other error occurred while constructing the URL
1082      */
1083     public URL toURL() throws MalformedURLException {
1084         return URL.fromURI(this);
1085     }
1086 
1087     // -- Component access methods --
1088 
1089     /**
1090      * Returns the scheme component of this URI.
1091      *
1092      * <p> The scheme component of a URI, if defined, only contains characters
1093      * in the <i>alphanum</i> category and in the string {@code "-.+"}.  A
1094      * scheme always starts with an <i>alpha</i> character. <p>
1095      *
1096      * The scheme component of a URI cannot contain escaped octets, hence this
1097      * method does not perform any decoding.
1098      *
1099      * @return  The scheme component of this URI,
1100      *          or {@code null} if the scheme is undefined
1101      */
1102     public String getScheme() {
1103         return scheme;
1104     }
1105 
1106     /**
1107      * Tells whether or not this URI is absolute.
1108      *
1109      * <p> A URI is absolute if, and only if, it has a scheme component. </p>
1110      *
1111      * @return  {@code true} if, and only if, this URI is absolute
1112      */
1113     public boolean isAbsolute() {
1114         return scheme != null;
1115     }
1116 
1117     /**
1118      * Tells whether or not this URI is opaque.
1119      *
1120      * <p> A URI is opaque if, and only if, it is absolute and its
1121      * scheme-specific part does not begin with a slash character ('/').
1122      * An opaque URI has a scheme, a scheme-specific part, and possibly
1123      * a fragment; all other components are undefined. </p>
1124      *
1125      * @return  {@code true} if, and only if, this URI is opaque
1126      */
1127     public boolean isOpaque() {
1128         return path == null;
1129     }
1130 
1131     /**
1132      * Returns the raw scheme-specific part of this URI.  The scheme-specific
1133      * part is never undefined, though it may be empty.
1134      *
1135      * <p> The scheme-specific part of a URI only contains legal URI
1136      * characters. </p>
1137      *
1138      * @return  The raw scheme-specific part of this URI
1139      *          (never {@code null})
1140      */
1141     public String getRawSchemeSpecificPart() {
1142         String part = schemeSpecificPart;
1143         if (part != null) {
1144             return part;
1145         }
1146 
1147         String s = string;
1148         if (s != null) {
1149             // if string is defined, components will have been parsed
1150             int start = 0;
1151             int end = s.length();
1152             if (scheme != null) {
1153                 start = scheme.length() + 1;
1154             }
1155             if (fragment != null) {
1156                 end -= fragment.length() + 1;
1157             }
1158             if (path != null && path.length() == end - start) {
1159                 part = path;
1160             } else {
1161                 part = s.substring(start, end);
1162             }
1163         } else {
1164             StringBuilder sb = new StringBuilder();
1165             appendSchemeSpecificPart(sb, null, getAuthority(), getUserInfo(),
1166                                  host, port, getPath(), getQuery());
1167             part = sb.toString();
1168         }
1169         return schemeSpecificPart = part;
1170     }
1171 
1172     /**
1173      * Returns the decoded scheme-specific part of this URI.
1174      *
1175      * <p> The string returned by this method is equal to that returned by the
1176      * {@link #getRawSchemeSpecificPart() getRawSchemeSpecificPart} method
1177      * except that all sequences of escaped octets are <a
1178      * href="#decode">decoded</a>.  </p>
1179      *
1180      * @return  The decoded scheme-specific part of this URI
1181      *          (never {@code null})
1182      */
1183     public String getSchemeSpecificPart() {
1184         String part = decodedSchemeSpecificPart;
1185         if (part == null) {
1186             decodedSchemeSpecificPart = part = decode(getRawSchemeSpecificPart());
1187         }
1188         return part;
1189     }
1190 
1191     /**
1192      * Returns the raw authority component of this URI.
1193      *
1194      * <p> The authority component of a URI, if defined, only contains the
1195      * commercial-at character ({@code '@'}) and characters in the
1196      * <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, and <i>other</i>
1197      * categories.  If the authority is server-based then it is further
1198      * constrained to have valid user-information, host, and port
1199      * components. </p>
1200      *
1201      * @return  The raw authority component of this URI,
1202      *          or {@code null} if the authority is undefined
1203      */
1204     public String getRawAuthority() {
1205         return authority;
1206     }
1207 
1208     /**
1209      * Returns the decoded authority component of this URI.
1210      *
1211      * <p> The string returned by this method is equal to that returned by the
1212      * {@link #getRawAuthority() getRawAuthority} method except that all
1213      * sequences of escaped octets are <a href="#decode">decoded</a>.  </p>
1214      *
1215      * @return  The decoded authority component of this URI,
1216      *          or {@code null} if the authority is undefined
1217      */
1218     public String getAuthority() {
1219         String auth = decodedAuthority;
1220         if ((auth == null) && (authority != null)) {
1221             decodedAuthority = auth = decode(authority);
1222         }
1223         return auth;
1224     }
1225 
1226     /**
1227      * Returns the raw user-information component of this URI.
1228      *
1229      * <p> The user-information component of a URI, if defined, only contains
1230      * characters in the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, and
1231      * <i>other</i> categories. </p>
1232      *
1233      * @return  The raw user-information component of this URI,
1234      *          or {@code null} if the user information is undefined
1235      */
1236     public String getRawUserInfo() {
1237         return userInfo;
1238     }
1239 
1240     /**
1241      * Returns the decoded user-information component of this URI.
1242      *
1243      * <p> The string returned by this method is equal to that returned by the
1244      * {@link #getRawUserInfo() getRawUserInfo} method except that all
1245      * sequences of escaped octets are <a href="#decode">decoded</a>.  </p>
1246      *
1247      * @return  The decoded user-information component of this URI,
1248      *          or {@code null} if the user information is undefined
1249      */
1250     public String getUserInfo() {
1251         String user = decodedUserInfo;
1252         if ((user == null) && (userInfo != null)) {
1253             decodedUserInfo = user = decode(userInfo);
1254         }
1255         return user;
1256     }
1257 
1258     /**
1259      * Returns the host component of this URI.
1260      *
1261      * <p> The host component of a URI, if defined, will have one of the
1262      * following forms: </p>
1263      *
1264      * <ul>
1265      *
1266      *   <li><p> A domain name consisting of one or more <i>labels</i>
1267      *   separated by period characters ({@code '.'}), optionally followed by
1268      *   a period character.  Each label consists of <i>alphanum</i> characters
1269      *   as well as hyphen characters ({@code '-'}), though hyphens never
1270      *   occur as the first or last characters in a label. The rightmost
1271      *   label of a domain name consisting of two or more labels, begins
1272      *   with an <i>alpha</i> character. </li>
1273      *
1274      *   <li><p> A dotted-quad IPv4 address of the form
1275      *   <i>digit</i>{@code +.}<i>digit</i>{@code +.}<i>digit</i>{@code +.}<i>digit</i>{@code +},
1276      *   where no <i>digit</i> sequence is longer than three characters and no
1277      *   sequence has a value larger than 255. </p></li>
1278      *
1279      *   <li><p> An IPv6 address enclosed in square brackets ({@code '['} and
1280      *   {@code ']'}) and consisting of hexadecimal digits, colon characters
1281      *   ({@code ':'}), and possibly an embedded IPv4 address.  The full
1282      *   syntax of IPv6 addresses is specified in <a
1283      *   href="http://www.ietf.org/rfc/rfc2373.txt"><i>RFC&nbsp;2373: IPv6
1284      *   Addressing Architecture</i></a>.  </p></li>
1285      *
1286      * </ul>
1287      *
1288      * The host component of a URI cannot contain escaped octets, hence this
1289      * method does not perform any decoding.
1290      *
1291      * @return  The host component of this URI,
1292      *          or {@code null} if the host is undefined
1293      */
1294     public String getHost() {
1295         return host;
1296     }
1297 
1298     /**
1299      * Returns the port number of this URI.
1300      *
1301      * <p> The port component of a URI, if defined, is a non-negative
1302      * integer. </p>
1303      *
1304      * @return  The port component of this URI,
1305      *          or {@code -1} if the port is undefined
1306      */
1307     public int getPort() {
1308         return port;
1309     }
1310 
1311     /**
1312      * Returns the raw path component of this URI.
1313      *
1314      * <p> The path component of a URI, if defined, only contains the slash
1315      * character ({@code '/'}), the commercial-at character ({@code '@'}),
1316      * and characters in the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>,
1317      * and <i>other</i> categories. </p>
1318      *
1319      * @return  The path component of this URI,
1320      *          or {@code null} if the path is undefined
1321      */
1322     public String getRawPath() {
1323         return path;
1324     }
1325 
1326     /**
1327      * Returns the decoded path component of this URI.
1328      *
1329      * <p> The string returned by this method is equal to that returned by the
1330      * {@link #getRawPath() getRawPath} method except that all sequences of
1331      * escaped octets are <a href="#decode">decoded</a>.  </p>
1332      *
1333      * @return  The decoded path component of this URI,
1334      *          or {@code null} if the path is undefined
1335      */
1336     public String getPath() {
1337         String decoded = decodedPath;
1338         if ((decoded == null) && (path != null)) {
1339             decodedPath = decoded = decode(path);
1340         }
1341         return decoded;
1342     }
1343 
1344     /**
1345      * Returns the raw query component of this URI.
1346      *
1347      * <p> The query component of a URI, if defined, only contains legal URI
1348      * characters. </p>
1349      *
1350      * @return  The raw query component of this URI,
1351      *          or {@code null} if the query is undefined
1352      */
1353     public String getRawQuery() {
1354         return query;
1355     }
1356 
1357     /**
1358      * Returns the decoded query component of this URI.
1359      *
1360      * <p> The string returned by this method is equal to that returned by the
1361      * {@link #getRawQuery() getRawQuery} method except that all sequences of
1362      * escaped octets are <a href="#decode">decoded</a>.  </p>
1363      *
1364      * @return  The decoded query component of this URI,
1365      *          or {@code null} if the query is undefined
1366      */
1367     public String getQuery() {
1368         String decoded = decodedQuery;
1369         if ((decoded == null) && (query != null)) {
1370             decodedQuery = decoded = decode(query, false);
1371         }
1372         return decoded;
1373     }
1374 
1375     /**
1376      * Returns the raw fragment component of this URI.
1377      *
1378      * <p> The fragment component of a URI, if defined, only contains legal URI
1379      * characters. </p>
1380      *
1381      * @return  The raw fragment component of this URI,
1382      *          or {@code null} if the fragment is undefined
1383      */
1384     public String getRawFragment() {
1385         return fragment;
1386     }
1387 
1388     /**
1389      * Returns the decoded fragment component of this URI.
1390      *
1391      * <p> The string returned by this method is equal to that returned by the
1392      * {@link #getRawFragment() getRawFragment} method except that all
1393      * sequences of escaped octets are <a href="#decode">decoded</a>.  </p>
1394      *
1395      * @return  The decoded fragment component of this URI,
1396      *          or {@code null} if the fragment is undefined
1397      */
1398     public String getFragment() {
1399         String decoded = decodedFragment;
1400         if ((decoded == null) && (fragment != null)) {
1401             decodedFragment = decoded = decode(fragment, false);
1402         }
1403         return decoded;
1404     }
1405 
1406 
1407     // -- Equality, comparison, hash code, toString, and serialization --
1408 
1409     /**
1410      * Tests this URI for equality with another object.
1411      *
1412      * <p> If the given object is not a URI then this method immediately
1413      * returns {@code false}.
1414      *
1415      * <p> For two URIs to be considered equal requires that either both are
1416      * opaque or both are hierarchical.  Their schemes must either both be
1417      * undefined or else be equal without regard to case. Their fragments
1418      * must either both be undefined or else be equal.
1419      *
1420      * <p> For two opaque URIs to be considered equal, their scheme-specific
1421      * parts must be equal.
1422      *
1423      * <p> For two hierarchical URIs to be considered equal, their paths must
1424      * be equal and their queries must either both be undefined or else be
1425      * equal.  Their authorities must either both be undefined, or both be
1426      * registry-based, or both be server-based.  If their authorities are
1427      * defined and are registry-based, then they must be equal.  If their
1428      * authorities are defined and are server-based, then their hosts must be
1429      * equal without regard to case, their port numbers must be equal, and
1430      * their user-information components must be equal.
1431      *
1432      * <p> When testing the user-information, path, query, fragment, authority,
1433      * or scheme-specific parts of two URIs for equality, the raw forms rather
1434      * than the encoded forms of these components are compared and the
1435      * hexadecimal digits of escaped octets are compared without regard to
1436      * case.
1437      *
1438      * <p> This method satisfies the general contract of the {@link
1439      * java.lang.Object#equals(Object) Object.equals} method. </p>
1440      *
1441      * @param   ob   The object to which this object is to be compared
1442      *
1443      * @return  {@code true} if, and only if, the given object is a URI that
1444      *          is identical to this URI
1445      */
1446     public boolean equals(Object ob) {
1447         if (ob == this)
1448             return true;
1449         if (!(ob instanceof URI))
1450             return false;
1451         URI that = (URI)ob;
1452         if (this.isOpaque() != that.isOpaque()) return false;
1453         if (!equalIgnoringCase(this.scheme, that.scheme)) return false;
1454         if (!equal(this.fragment, that.fragment)) return false;
1455 
1456         // Opaque
1457         if (this.isOpaque())
1458             return equal(this.schemeSpecificPart, that.schemeSpecificPart);
1459 
1460         // Hierarchical
1461         if (!equal(this.path, that.path)) return false;
1462         if (!equal(this.query, that.query)) return false;
1463 
1464         // Authorities
1465         if (this.authority == that.authority) return true;
1466         if (this.host != null) {
1467             // Server-based
1468             if (!equal(this.userInfo, that.userInfo)) return false;
1469             if (!equalIgnoringCase(this.host, that.host)) return false;
1470             if (this.port != that.port) return false;
1471         } else if (this.authority != null) {
1472             // Registry-based
1473             if (!equal(this.authority, that.authority)) return false;
1474         } else if (this.authority != that.authority) {
1475             return false;
1476         }
1477 
1478         return true;
1479     }
1480 
1481     /**
1482      * Returns a hash-code value for this URI.  The hash code is based upon all
1483      * of the URI's components, and satisfies the general contract of the
1484      * {@link java.lang.Object#hashCode() Object.hashCode} method.
1485      *
1486      * @return  A hash-code value for this URI
1487      */
1488     public int hashCode() {
1489         int h = hash;
1490         if (h == 0) {
1491             h = hashIgnoringCase(0, scheme);
1492             h = hash(h, fragment);
1493             if (isOpaque()) {
1494                 h = hash(h, schemeSpecificPart);
1495             } else {
1496                 h = hash(h, path);
1497                 h = hash(h, query);
1498                 if (host != null) {
1499                     h = hash(h, userInfo);
1500                     h = hashIgnoringCase(h, host);
1501                     h += 1949 * port;
1502                 } else {
1503                     h = hash(h, authority);
1504                 }
1505             }
1506             if (h != 0) {
1507                 hash = h;
1508             }
1509         }
1510         return h;
1511     }
1512 
1513     /**
1514      * Compares this URI to another object, which must be a URI.
1515      *
1516      * <p> When comparing corresponding components of two URIs, if one
1517      * component is undefined but the other is defined then the first is
1518      * considered to be less than the second.  Unless otherwise noted, string
1519      * components are ordered according to their natural, case-sensitive
1520      * ordering as defined by the {@link java.lang.String#compareTo(Object)
1521      * String.compareTo} method.  String components that are subject to
1522      * encoding are compared by comparing their raw forms rather than their
1523      * encoded forms.
1524      *
1525      * <p> The ordering of URIs is defined as follows: </p>
1526      *
1527      * <ul>
1528      *
1529      *   <li><p> Two URIs with different schemes are ordered according the
1530      *   ordering of their schemes, without regard to case. </p></li>
1531      *
1532      *   <li><p> A hierarchical URI is considered to be less than an opaque URI
1533      *   with an identical scheme. </p></li>
1534      *
1535      *   <li><p> Two opaque URIs with identical schemes are ordered according
1536      *   to the ordering of their scheme-specific parts. </p></li>
1537      *
1538      *   <li><p> Two opaque URIs with identical schemes and scheme-specific
1539      *   parts are ordered according to the ordering of their
1540      *   fragments. </p></li>
1541      *
1542      *   <li><p> Two hierarchical URIs with identical schemes are ordered
1543      *   according to the ordering of their authority components: </p>
1544      *
1545      *   <ul>
1546      *
1547      *     <li><p> If both authority components are server-based then the URIs
1548      *     are ordered according to their user-information components; if these
1549      *     components are identical then the URIs are ordered according to the
1550      *     ordering of their hosts, without regard to case; if the hosts are
1551      *     identical then the URIs are ordered according to the ordering of
1552      *     their ports. </p></li>
1553      *
1554      *     <li><p> If one or both authority components are registry-based then
1555      *     the URIs are ordered according to the ordering of their authority
1556      *     components. </p></li>
1557      *
1558      *   </ul></li>
1559      *
1560      *   <li><p> Finally, two hierarchical URIs with identical schemes and
1561      *   authority components are ordered according to the ordering of their
1562      *   paths; if their paths are identical then they are ordered according to
1563      *   the ordering of their queries; if the queries are identical then they
1564      *   are ordered according to the order of their fragments. </p></li>
1565      *
1566      * </ul>
1567      *
1568      * <p> This method satisfies the general contract of the {@link
1569      * java.lang.Comparable#compareTo(Object) Comparable.compareTo}
1570      * method. </p>
1571      *
1572      * @param   that
1573      *          The object to which this URI is to be compared
1574      *
1575      * @return  A negative integer, zero, or a positive integer as this URI is
1576      *          less than, equal to, or greater than the given URI
1577      *
1578      * @throws  ClassCastException
1579      *          If the given object is not a URI
1580      */
1581     public int compareTo(URI that) {
1582         int c;
1583 
1584         if ((c = compareIgnoringCase(this.scheme, that.scheme)) != 0)
1585             return c;
1586 
1587         if (this.isOpaque()) {
1588             if (that.isOpaque()) {
1589                 // Both opaque
1590                 if ((c = compare(this.schemeSpecificPart,
1591                                  that.schemeSpecificPart)) != 0)
1592                     return c;
1593                 return compare(this.fragment, that.fragment);
1594             }
1595             return +1;                  // Opaque > hierarchical
1596         } else if (that.isOpaque()) {
1597             return -1;                  // Hierarchical < opaque
1598         }
1599 
1600         // Hierarchical
1601         if ((this.host != null) && (that.host != null)) {
1602             // Both server-based
1603             if ((c = compare(this.userInfo, that.userInfo)) != 0)
1604                 return c;
1605             if ((c = compareIgnoringCase(this.host, that.host)) != 0)
1606                 return c;
1607             if ((c = this.port - that.port) != 0)
1608                 return c;
1609         } else {
1610             // If one or both authorities are registry-based then we simply
1611             // compare them in the usual, case-sensitive way.  If one is
1612             // registry-based and one is server-based then the strings are
1613             // guaranteed to be unequal, hence the comparison will never return
1614             // zero and the compareTo and equals methods will remain
1615             // consistent.
1616             if ((c = compare(this.authority, that.authority)) != 0) return c;
1617         }
1618 
1619         if ((c = compare(this.path, that.path)) != 0) return c;
1620         if ((c = compare(this.query, that.query)) != 0) return c;
1621         return compare(this.fragment, that.fragment);
1622     }
1623 
1624     /**
1625      * Returns the content of this URI as a string.
1626      *
1627      * <p> If this URI was created by invoking one of the constructors in this
1628      * class then a string equivalent to the original input string, or to the
1629      * string computed from the originally-given components, as appropriate, is
1630      * returned.  Otherwise this URI was created by normalization, resolution,
1631      * or relativization, and so a string is constructed from this URI's
1632      * components according to the rules specified in <a
1633      * href="http://www.ietf.org/rfc/rfc2396.txt">RFC&nbsp;2396</a>,
1634      * section&nbsp;5.2, step&nbsp;7. </p>
1635      *
1636      * @return  The string form of this URI
1637      */
1638     public String toString() {
1639         String s = string;
1640         if (s == null) {
1641             s = defineString();
1642         }
1643         return s;
1644     }
1645 
1646     private String defineString() {
1647         String s = string;
1648         if (s != null) {
1649             return s;
1650         }
1651 
1652         StringBuilder sb = new StringBuilder();
1653         if (scheme != null) {
1654             sb.append(scheme);
1655             sb.append(':');
1656         }
1657         if (isOpaque()) {
1658             sb.append(schemeSpecificPart);
1659         } else {
1660             if (host != null) {
1661                 sb.append("//");
1662                 if (userInfo != null) {
1663                     sb.append(userInfo);
1664                     sb.append('@');
1665                 }
1666                 boolean needBrackets = ((host.indexOf(':') >= 0)
1667                         && !host.startsWith("[")
1668                         && !host.endsWith("]"));
1669                 if (needBrackets) sb.append('[');
1670                 sb.append(host);
1671                 if (needBrackets) sb.append(']');
1672                 if (port != -1) {
1673                     sb.append(':');
1674                     sb.append(port);
1675                 }
1676             } else if (authority != null) {
1677                 sb.append("//");
1678                 sb.append(authority);
1679             }
1680             if (path != null)
1681                 sb.append(path);
1682             if (query != null) {
1683                 sb.append('?');
1684                 sb.append(query);
1685             }
1686         }
1687         if (fragment != null) {
1688             sb.append('#');
1689             sb.append(fragment);
1690         }
1691         return string = sb.toString();
1692     }
1693 
1694     /**
1695      * Returns the content of this URI as a US-ASCII string.
1696      *
1697      * <p> If this URI does not contain any characters in the <i>other</i>
1698      * category then an invocation of this method will return the same value as
1699      * an invocation of the {@link #toString() toString} method.  Otherwise
1700      * this method works as if by invoking that method and then <a
1701      * href="#encode">encoding</a> the result.  </p>
1702      *
1703      * @return  The string form of this URI, encoded as needed
1704      *          so that it only contains characters in the US-ASCII
1705      *          charset
1706      */
1707     public String toASCIIString() {
1708         return encode(toString());
1709     }
1710 
1711 
1712     // -- Serialization support --
1713 
1714     /**
1715      * Saves the content of this URI to the given serial stream.
1716      *
1717      * <p> The only serializable field of a URI instance is its {@code string}
1718      * field.  That field is given a value, if it does not have one already,
1719      * and then the {@link java.io.ObjectOutputStream#defaultWriteObject()}
1720      * method of the given object-output stream is invoked. </p>
1721      *
1722      * @param  os  The object-output stream to which this object
1723      *             is to be written
1724      */
1725     private void writeObject(ObjectOutputStream os)
1726         throws IOException
1727     {
1728         defineString();
1729         os.defaultWriteObject();        // Writes the string field only
1730     }
1731 
1732     /**
1733      * Reconstitutes a URI from the given serial stream.
1734      *
1735      * <p> The {@link java.io.ObjectInputStream#defaultReadObject()} method is
1736      * invoked to read the value of the {@code string} field.  The result is
1737      * then parsed in the usual way.
1738      *
1739      * @param  is  The object-input stream from which this object
1740      *             is being read
1741      */
1742     private void readObject(ObjectInputStream is)
1743         throws ClassNotFoundException, IOException
1744     {
1745         port = -1;                      // Argh
1746         is.defaultReadObject();
1747         try {
1748             new Parser(string).parse(false);
1749         } catch (URISyntaxException x) {
1750             IOException y = new InvalidObjectException("Invalid URI");
1751             y.initCause(x);
1752             throw y;
1753         }
1754     }
1755 
1756 
1757     // -- End of public methods --
1758 
1759 
1760     // -- Utility methods for string-field comparison and hashing --
1761 
1762     // These methods return appropriate values for null string arguments,
1763     // thereby simplifying the equals, hashCode, and compareTo methods.
1764     //
1765     // The case-ignoring methods should only be applied to strings whose
1766     // characters are all known to be US-ASCII.  Because of this restriction,
1767     // these methods are faster than the similar methods in the String class.
1768 
1769     // US-ASCII only
1770     private static int toLower(char c) {
1771         if ((c >= 'A') && (c <= 'Z'))
1772             return c + ('a' - 'A');
1773         return c;
1774     }
1775 
1776     // US-ASCII only
1777     private static int toUpper(char c) {
1778         if ((c >= 'a') && (c <= 'z'))
1779             return c - ('a' - 'A');
1780         return c;
1781     }
1782 
1783     private static boolean equal(String s, String t) {
1784         if (s == t) return true;
1785         if ((s != null) && (t != null)) {
1786             if (s.length() != t.length())
1787                 return false;
1788             if (s.indexOf('%') < 0)
1789                 return s.equals(t);
1790             int n = s.length();
1791             for (int i = 0; i < n;) {
1792                 char c = s.charAt(i);
1793                 char d = t.charAt(i);
1794                 if (c != '%') {
1795                     if (c != d)
1796                         return false;
1797                     i++;
1798                     continue;
1799                 }
1800                 if (d != '%')
1801                     return false;
1802                 i++;
1803                 if (toLower(s.charAt(i)) != toLower(t.charAt(i)))
1804                     return false;
1805                 i++;
1806                 if (toLower(s.charAt(i)) != toLower(t.charAt(i)))
1807                     return false;
1808                 i++;
1809             }
1810             return true;
1811         }
1812         return false;
1813     }
1814 
1815     // US-ASCII only
1816     private static boolean equalIgnoringCase(String s, String t) {
1817         if (s == t) return true;
1818         if ((s != null) && (t != null)) {
1819             int n = s.length();
1820             if (t.length() != n)
1821                 return false;
1822             for (int i = 0; i < n; i++) {
1823                 if (toLower(s.charAt(i)) != toLower(t.charAt(i)))
1824                     return false;
1825             }
1826             return true;
1827         }
1828         return false;
1829     }
1830 
1831     private static int hash(int hash, String s) {
1832         if (s == null) return hash;
1833         return s.indexOf('%') < 0 ? hash * 127 + s.hashCode()
1834                                   : normalizedHash(hash, s);
1835     }
1836 
1837 
1838     private static int normalizedHash(int hash, String s) {
1839         int h = 0;
1840         for (int index = 0; index < s.length(); index++) {
1841             char ch = s.charAt(index);
1842             h = 31 * h + ch;
1843             if (ch == '%') {
1844                 /*
1845                  * Process the next two encoded characters
1846                  */
1847                 for (int i = index + 1; i < index + 3; i++)
1848                     h = 31 * h + toUpper(s.charAt(i));
1849                 index += 2;
1850             }
1851         }
1852         return hash * 127 + h;
1853     }
1854 
1855     // US-ASCII only
1856     private static int hashIgnoringCase(int hash, String s) {
1857         if (s == null) return hash;
1858         int h = hash;
1859         int n = s.length();
1860         for (int i = 0; i < n; i++)
1861             h = 31 * h + toLower(s.charAt(i));
1862         return h;
1863     }
1864 
1865     private static int compare(String s, String t) {
1866         if (s == t) return 0;
1867         if (s != null) {
1868             if (t != null)
1869                 return s.compareTo(t);
1870             else
1871                 return +1;
1872         } else {
1873             return -1;
1874         }
1875     }
1876 
1877     // US-ASCII only
1878     private static int compareIgnoringCase(String s, String t) {
1879         if (s == t) return 0;
1880         if (s != null) {
1881             if (t != null) {
1882                 int sn = s.length();
1883                 int tn = t.length();
1884                 int n = sn < tn ? sn : tn;
1885                 for (int i = 0; i < n; i++) {
1886                     int c = toLower(s.charAt(i)) - toLower(t.charAt(i));
1887                     if (c != 0)
1888                         return c;
1889                 }
1890                 return sn - tn;
1891             }
1892             return +1;
1893         } else {
1894             return -1;
1895         }
1896     }
1897 
1898 
1899     // -- String construction --
1900 
1901     // If a scheme is given then the path, if given, must be absolute
1902     //
1903     private static void checkPath(String s, String scheme, String path)
1904         throws URISyntaxException
1905     {
1906         if (scheme != null) {
1907             if ((path != null)
1908                 && ((path.length() > 0) && (path.charAt(0) != '/')))
1909                 throw new URISyntaxException(s,
1910                                              "Relative path in absolute URI");
1911         }
1912     }
1913 
1914     private void appendAuthority(StringBuilder sb,
1915                                  String authority,
1916                                  String userInfo,
1917                                  String host,
1918                                  int port)
1919     {
1920         if (host != null) {
1921             sb.append("//");
1922             if (userInfo != null) {
1923                 sb.append(quote(userInfo, L_USERINFO, H_USERINFO));
1924                 sb.append('@');
1925             }
1926             boolean needBrackets = ((host.indexOf(':') >= 0)
1927                                     && !host.startsWith("[")
1928                                     && !host.endsWith("]"));
1929             if (needBrackets) sb.append('[');
1930             sb.append(host);
1931             if (needBrackets) sb.append(']');
1932             if (port != -1) {
1933                 sb.append(':');
1934                 sb.append(port);
1935             }
1936         } else if (authority != null) {
1937             sb.append("//");
1938             if (authority.startsWith("[")) {
1939                 // authority should (but may not) contain an embedded IPv6 address
1940                 int end = authority.indexOf(']');
1941                 String doquote = authority, dontquote = "";
1942                 if (end != -1 && authority.indexOf(':') != -1) {
1943                     // the authority contains an IPv6 address
1944                     if (end == authority.length()) {
1945                         dontquote = authority;
1946                         doquote = "";
1947                     } else {
1948                         dontquote = authority.substring(0 , end + 1);
1949                         doquote = authority.substring(end + 1);
1950                     }
1951                 }
1952                 sb.append(dontquote);
1953                 sb.append(quote(doquote,
1954                             L_REG_NAME | L_SERVER,
1955                             H_REG_NAME | H_SERVER));
1956             } else {
1957                 sb.append(quote(authority,
1958                             L_REG_NAME | L_SERVER,
1959                             H_REG_NAME | H_SERVER));
1960             }
1961         }
1962     }
1963 
1964     private void appendSchemeSpecificPart(StringBuilder sb,
1965                                           String opaquePart,
1966                                           String authority,
1967                                           String userInfo,
1968                                           String host,
1969                                           int port,
1970                                           String path,
1971                                           String query)
1972     {
1973         if (opaquePart != null) {
1974             /* check if SSP begins with an IPv6 address
1975              * because we must not quote a literal IPv6 address
1976              */
1977             if (opaquePart.startsWith("//[")) {
1978                 int end =  opaquePart.indexOf(']');
1979                 if (end != -1 && opaquePart.indexOf(':')!=-1) {
1980                     String doquote, dontquote;
1981                     if (end == opaquePart.length()) {
1982                         dontquote = opaquePart;
1983                         doquote = "";
1984                     } else {
1985                         dontquote = opaquePart.substring(0,end+1);
1986                         doquote = opaquePart.substring(end+1);
1987                     }
1988                     sb.append (dontquote);
1989                     sb.append(quote(doquote, L_URIC, H_URIC));
1990                 }
1991             } else {
1992                 sb.append(quote(opaquePart, L_URIC, H_URIC));
1993             }
1994         } else {
1995             appendAuthority(sb, authority, userInfo, host, port);
1996             if (path != null)
1997                 sb.append(quote(path, L_PATH, H_PATH));
1998             if (query != null) {
1999                 sb.append('?');
2000                 sb.append(quote(query, L_URIC, H_URIC));
2001             }
2002         }
2003     }
2004 
2005     private void appendFragment(StringBuilder sb, String fragment) {
2006         if (fragment != null) {
2007             sb.append('#');
2008             sb.append(quote(fragment, L_URIC, H_URIC));
2009         }
2010     }
2011 
2012     private String toString(String scheme,
2013                             String opaquePart,
2014                             String authority,
2015                             String userInfo,
2016                             String host,
2017                             int port,
2018                             String path,
2019                             String query,
2020                             String fragment)
2021     {
2022         StringBuilder sb = new StringBuilder();
2023         if (scheme != null) {
2024             sb.append(scheme);
2025             sb.append(':');
2026         }
2027         appendSchemeSpecificPart(sb, opaquePart,
2028                                  authority, userInfo, host, port,
2029                                  path, query);
2030         appendFragment(sb, fragment);
2031         return sb.toString();
2032     }
2033 
2034     // -- Normalization, resolution, and relativization --
2035 
2036     // RFC2396 5.2 (6)
2037     private static String resolvePath(String base, String child,
2038                                       boolean absolute)
2039     {
2040         int i = base.lastIndexOf('/');
2041         int cn = child.length();
2042         String path = "";
2043 
2044         if (cn == 0) {
2045             // 5.2 (6a)
2046             if (i >= 0)
2047                 path = base.substring(0, i + 1);
2048         } else {
2049             StringBuilder sb = new StringBuilder(base.length() + cn);
2050             // 5.2 (6a)
2051             if (i >= 0)
2052                 sb.append(base, 0, i + 1);
2053             // 5.2 (6b)
2054             sb.append(child);
2055             path = sb.toString();
2056         }
2057 
2058         // 5.2 (6c-f)
2059         String np = normalize(path);
2060 
2061         // 5.2 (6g): If the result is absolute but the path begins with "../",
2062         // then we simply leave the path as-is
2063 
2064         return np;
2065     }
2066 
2067     // RFC2396 5.2
2068     private static URI resolve(URI base, URI child) {
2069         // check if child if opaque first so that NPE is thrown
2070         // if child is null.
2071         if (child.isOpaque() || base.isOpaque())
2072             return child;
2073 
2074         // 5.2 (2): Reference to current document (lone fragment)
2075         if ((child.scheme == null) && (child.authority == null)
2076             && child.path.isEmpty() && (child.fragment != null)
2077             && (child.query == null)) {
2078             if ((base.fragment != null)
2079                 && child.fragment.equals(base.fragment)) {
2080                 return base;
2081             }
2082             URI ru = new URI();
2083             ru.scheme = base.scheme;
2084             ru.authority = base.authority;
2085             ru.userInfo = base.userInfo;
2086             ru.host = base.host;
2087             ru.port = base.port;
2088             ru.path = base.path;
2089             ru.fragment = child.fragment;
2090             ru.query = base.query;
2091             return ru;
2092         }
2093 
2094         // 5.2 (3): Child is absolute
2095         if (child.scheme != null)
2096             return child;
2097 
2098         URI ru = new URI();             // Resolved URI
2099         ru.scheme = base.scheme;
2100         ru.query = child.query;
2101         ru.fragment = child.fragment;
2102 
2103         // 5.2 (4): Authority
2104         if (child.authority == null) {
2105             ru.authority = base.authority;
2106             ru.host = base.host;
2107             ru.userInfo = base.userInfo;
2108             ru.port = base.port;
2109 
2110             String cp = (child.path == null) ? "" : child.path;
2111             if ((cp.length() > 0) && (cp.charAt(0) == '/')) {
2112                 // 5.2 (5): Child path is absolute
2113                 ru.path = child.path;
2114             } else {
2115                 // 5.2 (6): Resolve relative path
2116                 ru.path = resolvePath(base.path, cp, base.isAbsolute());
2117             }
2118         } else {
2119             ru.authority = child.authority;
2120             ru.host = child.host;
2121             ru.userInfo = child.userInfo;
2122             ru.host = child.host;
2123             ru.port = child.port;
2124             ru.path = child.path;
2125         }
2126 
2127         // 5.2 (7): Recombine (nothing to do here)
2128         return ru;
2129     }
2130 
2131     // If the given URI's path is normal then return the URI;
2132     // o.w., return a new URI containing the normalized path.
2133     //
2134     private static URI normalize(URI u) {
2135         if (u.isOpaque() || (u.path == null) || (u.path.length() == 0))
2136             return u;
2137 
2138         String np = normalize(u.path);
2139         if (np == u.path)
2140             return u;
2141 
2142         URI v = new URI();
2143         v.scheme = u.scheme;
2144         v.fragment = u.fragment;
2145         v.authority = u.authority;
2146         v.userInfo = u.userInfo;
2147         v.host = u.host;
2148         v.port = u.port;
2149         v.path = np;
2150         v.query = u.query;
2151         return v;
2152     }
2153 
2154     // If both URIs are hierarchical, their scheme and authority components are
2155     // identical, and the base path is a prefix of the child's path, then
2156     // return a relative URI that, when resolved against the base, yields the
2157     // child; otherwise, return the child.
2158     //
2159     private static URI relativize(URI base, URI child) {
2160         // check if child if opaque first so that NPE is thrown
2161         // if child is null.
2162         if (child.isOpaque() || base.isOpaque())
2163             return child;
2164         if (!equalIgnoringCase(base.scheme, child.scheme)
2165             || !equal(base.authority, child.authority))
2166             return child;
2167 
2168         String bp = normalize(base.path);
2169         String cp = normalize(child.path);
2170         if (!bp.equals(cp)) {
2171             if (!bp.endsWith("/"))
2172                 bp = bp + "/";
2173             if (!cp.startsWith(bp))
2174                 return child;
2175         }
2176 
2177         URI v = new URI();
2178         v.path = cp.substring(bp.length());
2179         v.query = child.query;
2180         v.fragment = child.fragment;
2181         return v;
2182     }
2183 
2184 
2185 
2186     // -- Path normalization --
2187 
2188     // The following algorithm for path normalization avoids the creation of a
2189     // string object for each segment, as well as the use of a string buffer to
2190     // compute the final result, by using a single char array and editing it in
2191     // place.  The array is first split into segments, replacing each slash
2192     // with '\0' and creating a segment-index array, each element of which is
2193     // the index of the first char in the corresponding segment.  We then walk
2194     // through both arrays, removing ".", "..", and other segments as necessary
2195     // by setting their entries in the index array to -1.  Finally, the two
2196     // arrays are used to rejoin the segments and compute the final result.
2197     //
2198     // This code is based upon src/solaris/native/java/io/canonicalize_md.c
2199 
2200 
2201     // Check the given path to see if it might need normalization.  A path
2202     // might need normalization if it contains duplicate slashes, a "."
2203     // segment, or a ".." segment.  Return -1 if no further normalization is
2204     // possible, otherwise return the number of segments found.
2205     //
2206     // This method takes a string argument rather than a char array so that
2207     // this test can be performed without invoking path.toCharArray().
2208     //
2209     private static int needsNormalization(String path) {
2210         boolean normal = true;
2211         int ns = 0;                     // Number of segments
2212         int end = path.length() - 1;    // Index of last char in path
2213         int p = 0;                      // Index of next char in path
2214 
2215         // Skip initial slashes
2216         while (p <= end) {
2217             if (path.charAt(p) != '/') break;
2218             p++;
2219         }
2220         if (p > 1) normal = false;
2221 
2222         // Scan segments
2223         while (p <= end) {
2224 
2225             // Looking at "." or ".." ?
2226             if ((path.charAt(p) == '.')
2227                 && ((p == end)
2228                     || ((path.charAt(p + 1) == '/')
2229                         || ((path.charAt(p + 1) == '.')
2230                             && ((p + 1 == end)
2231                                 || (path.charAt(p + 2) == '/')))))) {
2232                 normal = false;
2233             }
2234             ns++;
2235 
2236             // Find beginning of next segment
2237             while (p <= end) {
2238                 if (path.charAt(p++) != '/')
2239                     continue;
2240 
2241                 // Skip redundant slashes
2242                 while (p <= end) {
2243                     if (path.charAt(p) != '/') break;
2244                     normal = false;
2245                     p++;
2246                 }
2247 
2248                 break;
2249             }
2250         }
2251 
2252         return normal ? -1 : ns;
2253     }
2254 
2255 
2256     // Split the given path into segments, replacing slashes with nulls and
2257     // filling in the given segment-index array.
2258     //
2259     // Preconditions:
2260     //   segs.length == Number of segments in path
2261     //
2262     // Postconditions:
2263     //   All slashes in path replaced by '\0'
2264     //   segs[i] == Index of first char in segment i (0 <= i < segs.length)
2265     //
2266     private static void split(char[] path, int[] segs) {
2267         int end = path.length - 1;      // Index of last char in path
2268         int p = 0;                      // Index of next char in path
2269         int i = 0;                      // Index of current segment
2270 
2271         // Skip initial slashes
2272         while (p <= end) {
2273             if (path[p] != '/') break;
2274             path[p] = '\0';
2275             p++;
2276         }
2277 
2278         while (p <= end) {
2279 
2280             // Note start of segment
2281             segs[i++] = p++;
2282 
2283             // Find beginning of next segment
2284             while (p <= end) {
2285                 if (path[p++] != '/')
2286                     continue;
2287                 path[p - 1] = '\0';
2288 
2289                 // Skip redundant slashes
2290                 while (p <= end) {
2291                     if (path[p] != '/') break;
2292                     path[p++] = '\0';
2293                 }
2294                 break;
2295             }
2296         }
2297 
2298         if (i != segs.length)
2299             throw new InternalError();  // ASSERT
2300     }
2301 
2302 
2303     // Join the segments in the given path according to the given segment-index
2304     // array, ignoring those segments whose index entries have been set to -1,
2305     // and inserting slashes as needed.  Return the length of the resulting
2306     // path.
2307     //
2308     // Preconditions:
2309     //   segs[i] == -1 implies segment i is to be ignored
2310     //   path computed by split, as above, with '\0' having replaced '/'
2311     //
2312     // Postconditions:
2313     //   path[0] .. path[return value] == Resulting path
2314     //
2315     private static int join(char[] path, int[] segs) {
2316         int ns = segs.length;           // Number of segments
2317         int end = path.length - 1;      // Index of last char in path
2318         int p = 0;                      // Index of next path char to write
2319 
2320         if (path[p] == '\0') {
2321             // Restore initial slash for absolute paths
2322             path[p++] = '/';
2323         }
2324 
2325         for (int i = 0; i < ns; i++) {
2326             int q = segs[i];            // Current segment
2327             if (q == -1)
2328                 // Ignore this segment
2329                 continue;
2330 
2331             if (p == q) {
2332                 // We're already at this segment, so just skip to its end
2333                 while ((p <= end) && (path[p] != '\0'))
2334                     p++;
2335                 if (p <= end) {
2336                     // Preserve trailing slash
2337                     path[p++] = '/';
2338                 }
2339             } else if (p < q) {
2340                 // Copy q down to p
2341                 while ((q <= end) && (path[q] != '\0'))
2342                     path[p++] = path[q++];
2343                 if (q <= end) {
2344                     // Preserve trailing slash
2345                     path[p++] = '/';
2346                 }
2347             } else
2348                 throw new InternalError(); // ASSERT false
2349         }
2350 
2351         return p;
2352     }
2353 
2354 
2355     // Remove "." segments from the given path, and remove segment pairs
2356     // consisting of a non-".." segment followed by a ".." segment.
2357     //
2358     private static void removeDots(char[] path, int[] segs) {
2359         int ns = segs.length;
2360         int end = path.length - 1;
2361 
2362         for (int i = 0; i < ns; i++) {
2363             int dots = 0;               // Number of dots found (0, 1, or 2)
2364 
2365             // Find next occurrence of "." or ".."
2366             do {
2367                 int p = segs[i];
2368                 if (path[p] == '.') {
2369                     if (p == end) {
2370                         dots = 1;
2371                         break;
2372                     } else if (path[p + 1] == '\0') {
2373                         dots = 1;
2374                         break;
2375                     } else if ((path[p + 1] == '.')
2376                                && ((p + 1 == end)
2377                                    || (path[p + 2] == '\0'))) {
2378                         dots = 2;
2379                         break;
2380                     }
2381                 }
2382                 i++;
2383             } while (i < ns);
2384             if ((i > ns) || (dots == 0))
2385                 break;
2386 
2387             if (dots == 1) {
2388                 // Remove this occurrence of "."
2389                 segs[i] = -1;
2390             } else {
2391                 // If there is a preceding non-".." segment, remove both that
2392                 // segment and this occurrence of ".."; otherwise, leave this
2393                 // ".." segment as-is.
2394                 int j;
2395                 for (j = i - 1; j >= 0; j--) {
2396                     if (segs[j] != -1) break;
2397                 }
2398                 if (j >= 0) {
2399                     int q = segs[j];
2400                     if (!((path[q] == '.')
2401                           && (path[q + 1] == '.')
2402                           && (path[q + 2] == '\0'))) {
2403                         segs[i] = -1;
2404                         segs[j] = -1;
2405                     }
2406                 }
2407             }
2408         }
2409     }
2410 
2411 
2412     // DEVIATION: If the normalized path is relative, and if the first
2413     // segment could be parsed as a scheme name, then prepend a "." segment
2414     //
2415     private static void maybeAddLeadingDot(char[] path, int[] segs) {
2416 
2417         if (path[0] == '\0')
2418             // The path is absolute
2419             return;
2420 
2421         int ns = segs.length;
2422         int f = 0;                      // Index of first segment
2423         while (f < ns) {
2424             if (segs[f] >= 0)
2425                 break;
2426             f++;
2427         }
2428         if ((f >= ns) || (f == 0))
2429             // The path is empty, or else the original first segment survived,
2430             // in which case we already know that no leading "." is needed
2431             return;
2432 
2433         int p = segs[f];
2434         while ((p < path.length) && (path[p] != ':') && (path[p] != '\0')) p++;
2435         if (p >= path.length || path[p] == '\0')
2436             // No colon in first segment, so no "." needed
2437             return;
2438 
2439         // At this point we know that the first segment is unused,
2440         // hence we can insert a "." segment at that position
2441         path[0] = '.';
2442         path[1] = '\0';
2443         segs[0] = 0;
2444     }
2445 
2446 
2447     // Normalize the given path string.  A normal path string has no empty
2448     // segments (i.e., occurrences of "//"), no segments equal to ".", and no
2449     // segments equal to ".." that are preceded by a segment not equal to "..".
2450     // In contrast to Unix-style pathname normalization, for URI paths we
2451     // always retain trailing slashes.
2452     //
2453     private static String normalize(String ps) {
2454 
2455         // Does this path need normalization?
2456         int ns = needsNormalization(ps);        // Number of segments
2457         if (ns < 0)
2458             // Nope -- just return it
2459             return ps;
2460 
2461         char[] path = ps.toCharArray();         // Path in char-array form
2462 
2463         // Split path into segments
2464         int[] segs = new int[ns];               // Segment-index array
2465         split(path, segs);
2466 
2467         // Remove dots
2468         removeDots(path, segs);
2469 
2470         // Prevent scheme-name confusion
2471         maybeAddLeadingDot(path, segs);
2472 
2473         // Join the remaining segments and return the result
2474         String s = new String(path, 0, join(path, segs));
2475         if (s.equals(ps)) {
2476             // string was already normalized
2477             return ps;
2478         }
2479         return s;
2480     }
2481 
2482 
2483 
2484     // -- Character classes for parsing --
2485 
2486     // RFC2396 precisely specifies which characters in the US-ASCII charset are
2487     // permissible in the various components of a URI reference.  We here
2488     // define a set of mask pairs to aid in enforcing these restrictions.  Each
2489     // mask pair consists of two longs, a low mask and a high mask.  Taken
2490     // together they represent a 128-bit mask, where bit i is set iff the
2491     // character with value i is permitted.
2492     //
2493     // This approach is more efficient than sequentially searching arrays of
2494     // permitted characters.  It could be made still more efficient by
2495     // precompiling the mask information so that a character's presence in a
2496     // given mask could be determined by a single table lookup.
2497 
2498     // Compute the low-order mask for the characters in the given string
2499     private static long lowMask(String chars) {
2500         int n = chars.length();
2501         long m = 0;
2502         for (int i = 0; i < n; i++) {
2503             char c = chars.charAt(i);
2504             if (c < 64)
2505                 m |= (1L << c);
2506         }
2507         return m;
2508     }
2509 
2510     // Compute the high-order mask for the characters in the given string
2511     private static long highMask(String chars) {
2512         int n = chars.length();
2513         long m = 0;
2514         for (int i = 0; i < n; i++) {
2515             char c = chars.charAt(i);
2516             if ((c >= 64) && (c < 128))
2517                 m |= (1L << (c - 64));
2518         }
2519         return m;
2520     }
2521 
2522     // Compute a low-order mask for the characters
2523     // between first and last, inclusive
2524     private static long lowMask(char first, char last) {
2525         long m = 0;
2526         int f = Math.max(Math.min(first, 63), 0);
2527         int l = Math.max(Math.min(last, 63), 0);
2528         for (int i = f; i <= l; i++)
2529             m |= 1L << i;
2530         return m;
2531     }
2532 
2533     // Compute a high-order mask for the characters
2534     // between first and last, inclusive
2535     private static long highMask(char first, char last) {
2536         long m = 0;
2537         int f = Math.max(Math.min(first, 127), 64) - 64;
2538         int l = Math.max(Math.min(last, 127), 64) - 64;
2539         for (int i = f; i <= l; i++)
2540             m |= 1L << i;
2541         return m;
2542     }
2543 
2544     // Tell whether the given character is permitted by the given mask pair
2545     private static boolean match(char c, long lowMask, long highMask) {
2546         if (c == 0) // 0 doesn't have a slot in the mask. So, it never matches.
2547             return false;
2548         if (c < 64)
2549             return ((1L << c) & lowMask) != 0;
2550         if (c < 128)
2551             return ((1L << (c - 64)) & highMask) != 0;
2552         return false;
2553     }
2554 
2555     // Character-class masks, in reverse order from RFC2396 because
2556     // initializers for static fields cannot make forward references.
2557 
2558     // digit    = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
2559     //            "8" | "9"
2560     private static final long L_DIGIT = lowMask('0', '9');
2561     private static final long H_DIGIT = 0L;
2562 
2563     // upalpha  = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" |
2564     //            "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" |
2565     //            "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z"
2566     private static final long L_UPALPHA = 0L;
2567     private static final long H_UPALPHA = highMask('A', 'Z');
2568 
2569     // lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" |
2570     //            "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" |
2571     //            "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z"
2572     private static final long L_LOWALPHA = 0L;
2573     private static final long H_LOWALPHA = highMask('a', 'z');
2574 
2575     // alpha         = lowalpha | upalpha
2576     private static final long L_ALPHA = L_LOWALPHA | L_UPALPHA;
2577     private static final long H_ALPHA = H_LOWALPHA | H_UPALPHA;
2578 
2579     // alphanum      = alpha | digit
2580     private static final long L_ALPHANUM = L_DIGIT | L_ALPHA;
2581     private static final long H_ALPHANUM = H_DIGIT | H_ALPHA;
2582 
2583     // hex           = digit | "A" | "B" | "C" | "D" | "E" | "F" |
2584     //                         "a" | "b" | "c" | "d" | "e" | "f"
2585     private static final long L_HEX = L_DIGIT;
2586     private static final long H_HEX = highMask('A', 'F') | highMask('a', 'f');
2587 
2588     // mark          = "-" | "_" | "." | "!" | "~" | "*" | "'" |
2589     //                 "(" | ")"
2590     private static final long L_MARK = lowMask("-_.!~*'()");
2591     private static final long H_MARK = highMask("-_.!~*'()");
2592 
2593     // unreserved    = alphanum | mark
2594     private static final long L_UNRESERVED = L_ALPHANUM | L_MARK;
2595     private static final long H_UNRESERVED = H_ALPHANUM | H_MARK;
2596 
2597     // reserved      = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
2598     //                 "$" | "," | "[" | "]"
2599     // Added per RFC2732: "[", "]"
2600     private static final long L_RESERVED = lowMask(";/?:@&=+$,[]");
2601     private static final long H_RESERVED = highMask(";/?:@&=+$,[]");
2602 
2603     // The zero'th bit is used to indicate that escape pairs and non-US-ASCII
2604     // characters are allowed; this is handled by the scanEscape method below.
2605     private static final long L_ESCAPED = 1L;
2606     private static final long H_ESCAPED = 0L;
2607 
2608     // uric          = reserved | unreserved | escaped
2609     private static final long L_URIC = L_RESERVED | L_UNRESERVED | L_ESCAPED;
2610     private static final long H_URIC = H_RESERVED | H_UNRESERVED | H_ESCAPED;
2611 
2612     // pchar         = unreserved | escaped |
2613     //                 ":" | "@" | "&" | "=" | "+" | "$" | ","
2614     private static final long L_PCHAR
2615         = L_UNRESERVED | L_ESCAPED | lowMask(":@&=+$,");
2616     private static final long H_PCHAR
2617         = H_UNRESERVED | H_ESCAPED | highMask(":@&=+$,");
2618 
2619     // All valid path characters
2620     private static final long L_PATH = L_PCHAR | lowMask(";/");
2621     private static final long H_PATH = H_PCHAR | highMask(";/");
2622 
2623     // Dash, for use in domainlabel and toplabel
2624     private static final long L_DASH = lowMask("-");
2625     private static final long H_DASH = highMask("-");
2626 
2627     // Dot, for use in hostnames
2628     private static final long L_DOT = lowMask(".");
2629     private static final long H_DOT = highMask(".");
2630 
2631     // userinfo      = *( unreserved | escaped |
2632     //                    ";" | ":" | "&" | "=" | "+" | "$" | "," )
2633     private static final long L_USERINFO
2634         = L_UNRESERVED | L_ESCAPED | lowMask(";:&=+$,");
2635     private static final long H_USERINFO
2636         = H_UNRESERVED | H_ESCAPED | highMask(";:&=+$,");
2637 
2638     // reg_name      = 1*( unreserved | escaped | "$" | "," |
2639     //                     ";" | ":" | "@" | "&" | "=" | "+" )
2640     private static final long L_REG_NAME
2641         = L_UNRESERVED | L_ESCAPED | lowMask("$,;:@&=+");
2642     private static final long H_REG_NAME
2643         = H_UNRESERVED | H_ESCAPED | highMask("$,;:@&=+");
2644 
2645     // All valid characters for server-based authorities
2646     private static final long L_SERVER
2647         = L_USERINFO | L_ALPHANUM | L_DASH | lowMask(".:@[]");
2648     private static final long H_SERVER
2649         = H_USERINFO | H_ALPHANUM | H_DASH | highMask(".:@[]");
2650 
2651     // Special case of server authority that represents an IPv6 address
2652     // In this case, a % does not signify an escape sequence
2653     private static final long L_SERVER_PERCENT
2654         = L_SERVER | lowMask("%");
2655     private static final long H_SERVER_PERCENT
2656         = H_SERVER | highMask("%");
2657     private static final long L_LEFT_BRACKET = lowMask("[");
2658     private static final long H_LEFT_BRACKET = highMask("[");
2659 
2660     // scheme        = alpha *( alpha | digit | "+" | "-" | "." )
2661     private static final long L_SCHEME = L_ALPHA | L_DIGIT | lowMask("+-.");
2662     private static final long H_SCHEME = H_ALPHA | H_DIGIT | highMask("+-.");
2663 
2664     // scope_id = alpha | digit | "_" | "."
2665     private static final long L_SCOPE_ID
2666         = L_ALPHANUM | lowMask("_.");
2667     private static final long H_SCOPE_ID
2668         = H_ALPHANUM | highMask("_.");
2669 
2670     // -- Escaping and encoding --
2671 
2672     private static final char[] hexDigits = {
2673         '0', '1', '2', '3', '4', '5', '6', '7',
2674         '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
2675     };
2676 
2677     private static void appendEscape(StringBuilder sb, byte b) {
2678         sb.append('%');
2679         sb.append(hexDigits[(b >> 4) & 0x0f]);
2680         sb.append(hexDigits[(b >> 0) & 0x0f]);
2681     }
2682 
2683     private static void appendEncoded(StringBuilder sb, char c) {
2684         ByteBuffer bb = null;
2685         try {
2686             bb = ThreadLocalCoders.encoderFor("UTF-8")
2687                 .encode(CharBuffer.wrap("" + c));
2688         } catch (CharacterCodingException x) {
2689             assert false;
2690         }
2691         while (bb.hasRemaining()) {
2692             int b = bb.get() & 0xff;
2693             if (b >= 0x80)
2694                 appendEscape(sb, (byte)b);
2695             else
2696                 sb.append((char)b);
2697         }
2698     }
2699 
2700     // Quote any characters in s that are not permitted
2701     // by the given mask pair
2702     //
2703     private static String quote(String s, long lowMask, long highMask) {
2704         StringBuilder sb = null;
2705         boolean allowNonASCII = ((lowMask & L_ESCAPED) != 0);
2706         for (int i = 0; i < s.length(); i++) {
2707             char c = s.charAt(i);
2708             if (c < '\u0080') {
2709                 if (!match(c, lowMask, highMask)) {
2710                     if (sb == null) {
2711                         sb = new StringBuilder();
2712                         sb.append(s, 0, i);
2713                     }
2714                     appendEscape(sb, (byte)c);
2715                 } else {
2716                     if (sb != null)
2717                         sb.append(c);
2718                 }
2719             } else if (allowNonASCII
2720                        && (Character.isSpaceChar(c)
2721                            || Character.isISOControl(c))) {
2722                 if (sb == null) {
2723                     sb = new StringBuilder();
2724                     sb.append(s, 0, i);
2725                 }
2726                 appendEncoded(sb, c);
2727             } else {
2728                 if (sb != null)
2729                     sb.append(c);
2730             }
2731         }
2732         return (sb == null) ? s : sb.toString();
2733     }
2734 
2735     // Encodes all characters >= \u0080 into escaped, normalized UTF-8 octets,
2736     // assuming that s is otherwise legal
2737     //
2738     private static String encode(String s) {
2739         int n = s.length();
2740         if (n == 0)
2741             return s;
2742 
2743         // First check whether we actually need to encode
2744         for (int i = 0;;) {
2745             if (s.charAt(i) >= '\u0080')
2746                 break;
2747             if (++i >= n)
2748                 return s;
2749         }
2750 
2751         String ns = Normalizer.normalize(s, Normalizer.Form.NFC);
2752         ByteBuffer bb = null;
2753         try {
2754             bb = ThreadLocalCoders.encoderFor("UTF-8")
2755                 .encode(CharBuffer.wrap(ns));
2756         } catch (CharacterCodingException x) {
2757             assert false;
2758         }
2759 
2760         StringBuilder sb = new StringBuilder();
2761         while (bb.hasRemaining()) {
2762             int b = bb.get() & 0xff;
2763             if (b >= 0x80)
2764                 appendEscape(sb, (byte)b);
2765             else
2766                 sb.append((char)b);
2767         }
2768         return sb.toString();
2769     }
2770 
2771     private static int decode(char c) {
2772         if ((c >= '0') && (c <= '9'))
2773             return c - '0';
2774         if ((c >= 'a') && (c <= 'f'))
2775             return c - 'a' + 10;
2776         if ((c >= 'A') && (c <= 'F'))
2777             return c - 'A' + 10;
2778         assert false;
2779         return -1;
2780     }
2781 
2782     private static byte decode(char c1, char c2) {
2783         return (byte)(  ((decode(c1) & 0xf) << 4)
2784                       | ((decode(c2) & 0xf) << 0));
2785     }
2786 
2787     // Evaluates all escapes in s, applying UTF-8 decoding if needed.  Assumes
2788     // that escapes are well-formed syntactically, i.e., of the form %XX.  If a
2789     // sequence of escaped octets is not valid UTF-8 then the erroneous octets
2790     // are replaced with '\uFFFD'.
2791     // Exception: any "%" found between "[]" is left alone. It is an IPv6 literal
2792     //            with a scope_id
2793     //
2794     private static String decode(String s) {
2795         return decode(s, true);
2796     }
2797 
2798     // This method was introduced as a generalization of URI.decode method
2799     // to provide a fix for JDK-8037396
2800     private static String decode(String s, boolean ignorePercentInBrackets) {
2801         if (s == null)
2802             return s;
2803         int n = s.length();
2804         if (n == 0)
2805             return s;
2806         if (s.indexOf('%') < 0)
2807             return s;
2808 
2809         StringBuilder sb = new StringBuilder(n);
2810         ByteBuffer bb = ByteBuffer.allocate(n);
2811         CharBuffer cb = CharBuffer.allocate(n);
2812         CharsetDecoder dec = ThreadLocalCoders.decoderFor("UTF-8")
2813                 .onMalformedInput(CodingErrorAction.REPLACE)
2814                 .onUnmappableCharacter(CodingErrorAction.REPLACE);
2815 
2816         // This is not horribly efficient, but it will do for now
2817         char c = s.charAt(0);
2818         boolean betweenBrackets = false;
2819 
2820         for (int i = 0; i < n;) {
2821             assert c == s.charAt(i);    // Loop invariant
2822             if (c == '[') {
2823                 betweenBrackets = true;
2824             } else if (betweenBrackets && c == ']') {
2825                 betweenBrackets = false;
2826             }
2827             if (c != '%' || (betweenBrackets && ignorePercentInBrackets)) {
2828                 sb.append(c);
2829                 if (++i >= n)
2830                     break;
2831                 c = s.charAt(i);
2832                 continue;
2833             }
2834             bb.clear();
2835             int ui = i;
2836             for (;;) {
2837                 assert (n - i >= 2);
2838                 bb.put(decode(s.charAt(++i), s.charAt(++i)));
2839                 if (++i >= n)
2840                     break;
2841                 c = s.charAt(i);
2842                 if (c != '%')
2843                     break;
2844             }
2845             bb.flip();
2846             cb.clear();
2847             dec.reset();
2848             CoderResult cr = dec.decode(bb, cb, true);
2849             assert cr.isUnderflow();
2850             cr = dec.flush(cb);
2851             assert cr.isUnderflow();
2852             sb.append(cb.flip().toString());
2853         }
2854 
2855         return sb.toString();
2856     }
2857 
2858 
2859     // -- Parsing --
2860 
2861     // For convenience we wrap the input URI string in a new instance of the
2862     // following internal class.  This saves always having to pass the input
2863     // string as an argument to each internal scan/parse method.
2864 
2865     private class Parser {
2866 
2867         private String input;           // URI input string
2868         private boolean requireServerAuthority = false;
2869 
2870         Parser(String s) {
2871             input = s;
2872             string = s;
2873         }
2874 
2875         // -- Methods for throwing URISyntaxException in various ways --
2876 
2877         private void fail(String reason) throws URISyntaxException {
2878             throw new URISyntaxException(input, reason);
2879         }
2880 
2881         private void fail(String reason, int p) throws URISyntaxException {
2882             throw new URISyntaxException(input, reason, p);
2883         }
2884 
2885         private void failExpecting(String expected, int p)
2886             throws URISyntaxException
2887         {
2888             fail("Expected " + expected, p);
2889         }
2890 
2891 
2892         // -- Simple access to the input string --
2893 
2894         // Tells whether start < end and, if so, whether charAt(start) == c
2895         //
2896         private boolean at(int start, int end, char c) {
2897             return (start < end) && (input.charAt(start) == c);
2898         }
2899 
2900         // Tells whether start + s.length() < end and, if so,
2901         // whether the chars at the start position match s exactly
2902         //
2903         private boolean at(int start, int end, String s) {
2904             int p = start;
2905             int sn = s.length();
2906             if (sn > end - p)
2907                 return false;
2908             int i = 0;
2909             while (i < sn) {
2910                 if (input.charAt(p++) != s.charAt(i)) {
2911                     break;
2912                 }
2913                 i++;
2914             }
2915             return (i == sn);
2916         }
2917 
2918 
2919         // -- Scanning --
2920 
2921         // The various scan and parse methods that follow use a uniform
2922         // convention of taking the current start position and end index as
2923         // their first two arguments.  The start is inclusive while the end is
2924         // exclusive, just as in the String class, i.e., a start/end pair
2925         // denotes the left-open interval [start, end) of the input string.
2926         //
2927         // These methods never proceed past the end position.  They may return
2928         // -1 to indicate outright failure, but more often they simply return
2929         // the position of the first char after the last char scanned.  Thus
2930         // a typical idiom is
2931         //
2932         //     int p = start;
2933         //     int q = scan(p, end, ...);
2934         //     if (q > p)
2935         //         // We scanned something
2936         //         ...;
2937         //     else if (q == p)
2938         //         // We scanned nothing
2939         //         ...;
2940         //     else if (q == -1)
2941         //         // Something went wrong
2942         //         ...;
2943 
2944 
2945         // Scan a specific char: If the char at the given start position is
2946         // equal to c, return the index of the next char; otherwise, return the
2947         // start position.
2948         //
2949         private int scan(int start, int end, char c) {
2950             if ((start < end) && (input.charAt(start) == c))
2951                 return start + 1;
2952             return start;
2953         }
2954 
2955         // Scan forward from the given start position.  Stop at the first char
2956         // in the err string (in which case -1 is returned), or the first char
2957         // in the stop string (in which case the index of the preceding char is
2958         // returned), or the end of the input string (in which case the length
2959         // of the input string is returned).  May return the start position if
2960         // nothing matches.
2961         //
2962         private int scan(int start, int end, String err, String stop) {
2963             int p = start;
2964             while (p < end) {
2965                 char c = input.charAt(p);
2966                 if (err.indexOf(c) >= 0)
2967                     return -1;
2968                 if (stop.indexOf(c) >= 0)
2969                     break;
2970                 p++;
2971             }
2972             return p;
2973         }
2974 
2975         // Scan forward from the given start position.  Stop at the first char
2976         // in the stop string (in which case the index of the preceding char is
2977         // returned), or the end of the input string (in which case the length
2978         // of the input string is returned).  May return the start position if
2979         // nothing matches.
2980         //
2981         private int scan(int start, int end, String stop) {
2982             int p = start;
2983             while (p < end) {
2984                 char c = input.charAt(p);
2985                 if (stop.indexOf(c) >= 0)
2986                     break;
2987                 p++;
2988             }
2989             return p;
2990         }
2991 
2992         // Scan a potential escape sequence, starting at the given position,
2993         // with the given first char (i.e., charAt(start) == c).
2994         //
2995         // This method assumes that if escapes are allowed then visible
2996         // non-US-ASCII chars are also allowed.
2997         //
2998         private int scanEscape(int start, int n, char first)
2999             throws URISyntaxException
3000         {
3001             int p = start;
3002             char c = first;
3003             if (c == '%') {
3004                 // Process escape pair
3005                 if ((p + 3 <= n)
3006                     && match(input.charAt(p + 1), L_HEX, H_HEX)
3007                     && match(input.charAt(p + 2), L_HEX, H_HEX)) {
3008                     return p + 3;
3009                 }
3010                 fail("Malformed escape pair", p);
3011             } else if ((c > 128)
3012                        && !Character.isSpaceChar(c)
3013                        && !Character.isISOControl(c)) {
3014                 // Allow unescaped but visible non-US-ASCII chars
3015                 return p + 1;
3016             }
3017             return p;
3018         }
3019 
3020         // Scan chars that match the given mask pair
3021         //
3022         private int scan(int start, int n, long lowMask, long highMask)
3023             throws URISyntaxException
3024         {
3025             int p = start;
3026             while (p < n) {
3027                 char c = input.charAt(p);
3028                 if (match(c, lowMask, highMask)) {
3029                     p++;
3030                     continue;
3031                 }
3032                 if ((lowMask & L_ESCAPED) != 0) {
3033                     int q = scanEscape(p, n, c);
3034                     if (q > p) {
3035                         p = q;
3036                         continue;
3037                     }
3038                 }
3039                 break;
3040             }
3041             return p;
3042         }
3043 
3044         // Check that each of the chars in [start, end) matches the given mask
3045         //
3046         private void checkChars(int start, int end,
3047                                 long lowMask, long highMask,
3048                                 String what)
3049             throws URISyntaxException
3050         {
3051             int p = scan(start, end, lowMask, highMask);
3052             if (p < end)
3053                 fail("Illegal character in " + what, p);
3054         }
3055 
3056         // Check that the char at position p matches the given mask
3057         //
3058         private void checkChar(int p,
3059                                long lowMask, long highMask,
3060                                String what)
3061             throws URISyntaxException
3062         {
3063             checkChars(p, p + 1, lowMask, highMask, what);
3064         }
3065 
3066 
3067         // -- Parsing --
3068 
3069         // [<scheme>:]<scheme-specific-part>[#<fragment>]
3070         //
3071         void parse(boolean rsa) throws URISyntaxException {
3072             requireServerAuthority = rsa;
3073             int n = input.length();
3074             int p = scan(0, n, "/?#", ":");
3075             if ((p >= 0) && at(p, n, ':')) {
3076                 if (p == 0)
3077                     failExpecting("scheme name", 0);
3078                 checkChar(0, L_ALPHA, H_ALPHA, "scheme name");
3079                 checkChars(1, p, L_SCHEME, H_SCHEME, "scheme name");
3080                 scheme = input.substring(0, p);
3081                 p++;                    // Skip ':'
3082                 if (at(p, n, '/')) {
3083                     p = parseHierarchical(p, n);
3084                 } else {
3085                     // opaque; need to create the schemeSpecificPart
3086                     int q = scan(p, n, "#");
3087                     if (q <= p)
3088                         failExpecting("scheme-specific part", p);
3089                     checkChars(p, q, L_URIC, H_URIC, "opaque part");
3090                     schemeSpecificPart = input.substring(p, q);
3091                     p = q;
3092                 }
3093             } else {
3094                 p = parseHierarchical(0, n);
3095             }
3096             if (at(p, n, '#')) {
3097                 checkChars(p + 1, n, L_URIC, H_URIC, "fragment");
3098                 fragment = input.substring(p + 1, n);
3099                 p = n;
3100             }
3101             if (p < n)
3102                 fail("end of URI", p);
3103         }
3104 
3105         // [//authority]<path>[?<query>]
3106         //
3107         // DEVIATION from RFC2396: We allow an empty authority component as
3108         // long as it's followed by a non-empty path, query component, or
3109         // fragment component.  This is so that URIs such as "file:///foo/bar"
3110         // will parse.  This seems to be the intent of RFC2396, though the
3111         // grammar does not permit it.  If the authority is empty then the
3112         // userInfo, host, and port components are undefined.
3113         //
3114         // DEVIATION from RFC2396: We allow empty relative paths.  This seems
3115         // to be the intent of RFC2396, but the grammar does not permit it.
3116         // The primary consequence of this deviation is that "#f" parses as a
3117         // relative URI with an empty path.
3118         //
3119         private int parseHierarchical(int start, int n)
3120             throws URISyntaxException
3121         {
3122             int p = start;
3123             if (at(p, n, '/') && at(p + 1, n, '/')) {
3124                 p += 2;
3125                 int q = scan(p, n, "/?#");
3126                 if (q > p) {
3127                     p = parseAuthority(p, q);
3128                 } else if (q < n) {
3129                     // DEVIATION: Allow empty authority prior to non-empty
3130                     // path, query component or fragment identifier
3131                 } else
3132                     failExpecting("authority", p);
3133             }
3134             int q = scan(p, n, "?#"); // DEVIATION: May be empty
3135             checkChars(p, q, L_PATH, H_PATH, "path");
3136             path = input.substring(p, q);
3137             p = q;
3138             if (at(p, n, '?')) {
3139                 p++;
3140                 q = scan(p, n, "#");
3141                 checkChars(p, q, L_URIC, H_URIC, "query");
3142                 query = input.substring(p, q);
3143                 p = q;
3144             }
3145             return p;
3146         }
3147 
3148         // authority     = server | reg_name
3149         //
3150         // Ambiguity: An authority that is a registry name rather than a server
3151         // might have a prefix that parses as a server.  We use the fact that
3152         // the authority component is always followed by '/' or the end of the
3153         // input string to resolve this: If the complete authority did not
3154         // parse as a server then we try to parse it as a registry name.
3155         //
3156         private int parseAuthority(int start, int n)
3157             throws URISyntaxException
3158         {
3159             int p = start;
3160             int q = p;
3161             URISyntaxException ex = null;
3162 
3163             boolean serverChars;
3164             boolean regChars;
3165 
3166             if (scan(p, n, "]") > p) {
3167                 // contains a literal IPv6 address, therefore % is allowed
3168                 serverChars = (scan(p, n, L_SERVER_PERCENT, H_SERVER_PERCENT) == n);
3169             } else {
3170                 serverChars = (scan(p, n, L_SERVER, H_SERVER) == n);
3171             }
3172             regChars = (scan(p, n, L_REG_NAME, H_REG_NAME) == n);
3173 
3174             if (regChars && !serverChars) {
3175                 // Must be a registry-based authority
3176                 authority = input.substring(p, n);
3177                 return n;
3178             }
3179 
3180             if (serverChars) {
3181                 // Might be (probably is) a server-based authority, so attempt
3182                 // to parse it as such.  If the attempt fails, try to treat it
3183                 // as a registry-based authority.
3184                 try {
3185                     q = parseServer(p, n);
3186                     if (q < n)
3187                         failExpecting("end of authority", q);
3188                     authority = input.substring(p, n);
3189                 } catch (URISyntaxException x) {
3190                     // Undo results of failed parse
3191                     userInfo = null;
3192                     host = null;
3193                     port = -1;
3194                     if (requireServerAuthority) {
3195                         // If we're insisting upon a server-based authority,
3196                         // then just re-throw the exception
3197                         throw x;
3198                     } else {
3199                         // Save the exception in case it doesn't parse as a
3200                         // registry either
3201                         ex = x;
3202                         q = p;
3203                     }
3204                 }
3205             }
3206 
3207             if (q < n) {
3208                 if (regChars) {
3209                     // Registry-based authority
3210                     authority = input.substring(p, n);
3211                 } else if (ex != null) {
3212                     // Re-throw exception; it was probably due to
3213                     // a malformed IPv6 address
3214                     throw ex;
3215                 } else {
3216                     fail("Illegal character in authority", q);
3217                 }
3218             }
3219 
3220             return n;
3221         }
3222 
3223 
3224         // [<userinfo>@]<host>[:<port>]
3225         //
3226         private int parseServer(int start, int n)
3227             throws URISyntaxException
3228         {
3229             int p = start;
3230             int q;
3231 
3232             // userinfo
3233             q = scan(p, n, "/?#", "@");
3234             if ((q >= p) && at(q, n, '@')) {
3235                 checkChars(p, q, L_USERINFO, H_USERINFO, "user info");
3236                 userInfo = input.substring(p, q);
3237                 p = q + 1;              // Skip '@'
3238             }
3239 
3240             // hostname, IPv4 address, or IPv6 address
3241             if (at(p, n, '[')) {
3242                 // DEVIATION from RFC2396: Support IPv6 addresses, per RFC2732
3243                 p++;
3244                 q = scan(p, n, "/?#", "]");
3245                 if ((q > p) && at(q, n, ']')) {
3246                     // look for a "%" scope id
3247                     int r = scan (p, q, "%");
3248                     if (r > p) {
3249                         parseIPv6Reference(p, r);
3250                         if (r+1 == q) {
3251                             fail ("scope id expected");
3252                         }
3253                         checkChars (r+1, q, L_SCOPE_ID, H_SCOPE_ID,
3254                                                 "scope id");
3255                     } else {
3256                         parseIPv6Reference(p, q);
3257                     }
3258                     host = input.substring(p-1, q+1);
3259                     p = q + 1;
3260                 } else {
3261                     failExpecting("closing bracket for IPv6 address", q);
3262                 }
3263             } else {
3264                 q = parseIPv4Address(p, n);
3265                 if (q <= p)
3266                     q = parseHostname(p, n);
3267                 p = q;
3268             }
3269 
3270             // port
3271             if (at(p, n, ':')) {
3272                 p++;
3273                 q = scan(p, n, "/");
3274                 if (q > p) {
3275                     checkChars(p, q, L_DIGIT, H_DIGIT, "port number");
3276                     try {
3277                         port = Integer.parseInt(input, p, q, 10);
3278                     } catch (NumberFormatException x) {
3279                         fail("Malformed port number", p);
3280                     }
3281                     p = q;
3282                 }
3283             }
3284             if (p < n)
3285                 failExpecting("port number", p);
3286 
3287             return p;
3288         }
3289 
3290         // Scan a string of decimal digits whose value fits in a byte
3291         //
3292         private int scanByte(int start, int n)
3293             throws URISyntaxException
3294         {
3295             int p = start;
3296             int q = scan(p, n, L_DIGIT, H_DIGIT);
3297             if (q <= p) return q;
3298             if (Integer.parseInt(input, p, q, 10) > 255) return p;
3299             return q;
3300         }
3301 
3302         // Scan an IPv4 address.
3303         //
3304         // If the strict argument is true then we require that the given
3305         // interval contain nothing besides an IPv4 address; if it is false
3306         // then we only require that it start with an IPv4 address.
3307         //
3308         // If the interval does not contain or start with (depending upon the
3309         // strict argument) a legal IPv4 address characters then we return -1
3310         // immediately; otherwise we insist that these characters parse as a
3311         // legal IPv4 address and throw an exception on failure.
3312         //
3313         // We assume that any string of decimal digits and dots must be an IPv4
3314         // address.  It won't parse as a hostname anyway, so making that
3315         // assumption here allows more meaningful exceptions to be thrown.
3316         //
3317         private int scanIPv4Address(int start, int n, boolean strict)
3318             throws URISyntaxException
3319         {
3320             int p = start;
3321             int q;
3322             int m = scan(p, n, L_DIGIT | L_DOT, H_DIGIT | H_DOT);
3323             if ((m <= p) || (strict && (m != n)))
3324                 return -1;
3325             for (;;) {
3326                 // Per RFC2732: At most three digits per byte
3327                 // Further constraint: Each element fits in a byte
3328                 if ((q = scanByte(p, m)) <= p) break;   p = q;
3329                 if ((q = scan(p, m, '.')) <= p) break;  p = q;
3330                 if ((q = scanByte(p, m)) <= p) break;   p = q;
3331                 if ((q = scan(p, m, '.')) <= p) break;  p = q;
3332                 if ((q = scanByte(p, m)) <= p) break;   p = q;
3333                 if ((q = scan(p, m, '.')) <= p) break;  p = q;
3334                 if ((q = scanByte(p, m)) <= p) break;   p = q;
3335                 if (q < m) break;
3336                 return q;
3337             }
3338             fail("Malformed IPv4 address", q);
3339             return -1;
3340         }
3341 
3342         // Take an IPv4 address: Throw an exception if the given interval
3343         // contains anything except an IPv4 address
3344         //
3345         private int takeIPv4Address(int start, int n, String expected)
3346             throws URISyntaxException
3347         {
3348             int p = scanIPv4Address(start, n, true);
3349             if (p <= start)
3350                 failExpecting(expected, start);
3351             return p;
3352         }
3353 
3354         // Attempt to parse an IPv4 address, returning -1 on failure but
3355         // allowing the given interval to contain [:<characters>] after
3356         // the IPv4 address.
3357         //
3358         private int parseIPv4Address(int start, int n) {
3359             int p;
3360 
3361             try {
3362                 p = scanIPv4Address(start, n, false);
3363             } catch (URISyntaxException x) {
3364                 return -1;
3365             } catch (NumberFormatException nfe) {
3366                 return -1;
3367             }
3368 
3369             if (p > start && p < n) {
3370                 // IPv4 address is followed by something - check that
3371                 // it's a ":" as this is the only valid character to
3372                 // follow an address.
3373                 if (input.charAt(p) != ':') {
3374                     p = -1;
3375                 }
3376             }
3377 
3378             if (p > start)
3379                 host = input.substring(start, p);
3380 
3381             return p;
3382         }
3383 
3384         // hostname      = domainlabel [ "." ] | 1*( domainlabel "." ) toplabel [ "." ]
3385         // domainlabel   = alphanum | alphanum *( alphanum | "-" ) alphanum
3386         // toplabel      = alpha | alpha *( alphanum | "-" ) alphanum
3387         //
3388         private int parseHostname(int start, int n)
3389             throws URISyntaxException
3390         {
3391             int p = start;
3392             int q;
3393             int l = -1;                 // Start of last parsed label
3394 
3395             do {
3396                 // domainlabel = alphanum [ *( alphanum | "-" ) alphanum ]
3397                 q = scan(p, n, L_ALPHANUM, H_ALPHANUM);
3398                 if (q <= p)
3399                     break;
3400                 l = p;
3401                 if (q > p) {
3402                     p = q;
3403                     q = scan(p, n, L_ALPHANUM | L_DASH, H_ALPHANUM | H_DASH);
3404                     if (q > p) {
3405                         if (input.charAt(q - 1) == '-')
3406                             fail("Illegal character in hostname", q - 1);
3407                         p = q;
3408                     }
3409                 }
3410                 q = scan(p, n, '.');
3411                 if (q <= p)
3412                     break;
3413                 p = q;
3414             } while (p < n);
3415 
3416             if ((p < n) && !at(p, n, ':'))
3417                 fail("Illegal character in hostname", p);
3418 
3419             if (l < 0)
3420                 failExpecting("hostname", start);
3421 
3422             // for a fully qualified hostname check that the rightmost
3423             // label starts with an alpha character.
3424             if (l > start && !match(input.charAt(l), L_ALPHA, H_ALPHA)) {
3425                 fail("Illegal character in hostname", l);
3426             }
3427 
3428             host = input.substring(start, p);
3429             return p;
3430         }
3431 
3432 
3433         // IPv6 address parsing, from RFC2373: IPv6 Addressing Architecture
3434         //
3435         // Bug: The grammar in RFC2373 Appendix B does not allow addresses of
3436         // the form ::12.34.56.78, which are clearly shown in the examples
3437         // earlier in the document.  Here is the original grammar:
3438         //
3439         //   IPv6address = hexpart [ ":" IPv4address ]
3440         //   hexpart     = hexseq | hexseq "::" [ hexseq ] | "::" [ hexseq ]
3441         //   hexseq      = hex4 *( ":" hex4)
3442         //   hex4        = 1*4HEXDIG
3443         //
3444         // We therefore use the following revised grammar:
3445         //
3446         //   IPv6address = hexseq [ ":" IPv4address ]
3447         //                 | hexseq [ "::" [ hexpost ] ]
3448         //                 | "::" [ hexpost ]
3449         //   hexpost     = hexseq | hexseq ":" IPv4address | IPv4address
3450         //   hexseq      = hex4 *( ":" hex4)
3451         //   hex4        = 1*4HEXDIG
3452         //
3453         // This covers all and only the following cases:
3454         //
3455         //   hexseq
3456         //   hexseq : IPv4address
3457         //   hexseq ::
3458         //   hexseq :: hexseq
3459         //   hexseq :: hexseq : IPv4address
3460         //   hexseq :: IPv4address
3461         //   :: hexseq
3462         //   :: hexseq : IPv4address
3463         //   :: IPv4address
3464         //   ::
3465         //
3466         // Additionally we constrain the IPv6 address as follows :-
3467         //
3468         //  i.  IPv6 addresses without compressed zeros should contain
3469         //      exactly 16 bytes.
3470         //
3471         //  ii. IPv6 addresses with compressed zeros should contain
3472         //      less than 16 bytes.
3473 
3474         private int ipv6byteCount = 0;
3475 
3476         private int parseIPv6Reference(int start, int n)
3477             throws URISyntaxException
3478         {
3479             int p = start;
3480             int q;
3481             boolean compressedZeros = false;
3482 
3483             q = scanHexSeq(p, n);
3484 
3485             if (q > p) {
3486                 p = q;
3487                 if (at(p, n, "::")) {
3488                     compressedZeros = true;
3489                     p = scanHexPost(p + 2, n);
3490                 } else if (at(p, n, ':')) {
3491                     p = takeIPv4Address(p + 1,  n, "IPv4 address");
3492                     ipv6byteCount += 4;
3493                 }
3494             } else if (at(p, n, "::")) {
3495                 compressedZeros = true;
3496                 p = scanHexPost(p + 2, n);
3497             }
3498             if (p < n)
3499                 fail("Malformed IPv6 address", start);
3500             if (ipv6byteCount > 16)
3501                 fail("IPv6 address too long", start);
3502             if (!compressedZeros && ipv6byteCount < 16)
3503                 fail("IPv6 address too short", start);
3504             if (compressedZeros && ipv6byteCount == 16)
3505                 fail("Malformed IPv6 address", start);
3506 
3507             return p;
3508         }
3509 
3510         private int scanHexPost(int start, int n)
3511             throws URISyntaxException
3512         {
3513             int p = start;
3514             int q;
3515 
3516             if (p == n)
3517                 return p;
3518 
3519             q = scanHexSeq(p, n);
3520             if (q > p) {
3521                 p = q;
3522                 if (at(p, n, ':')) {
3523                     p++;
3524                     p = takeIPv4Address(p, n, "hex digits or IPv4 address");
3525                     ipv6byteCount += 4;
3526                 }
3527             } else {
3528                 p = takeIPv4Address(p, n, "hex digits or IPv4 address");
3529                 ipv6byteCount += 4;
3530             }
3531             return p;
3532         }
3533 
3534         // Scan a hex sequence; return -1 if one could not be scanned
3535         //
3536         private int scanHexSeq(int start, int n)
3537             throws URISyntaxException
3538         {
3539             int p = start;
3540             int q;
3541 
3542             q = scan(p, n, L_HEX, H_HEX);
3543             if (q <= p)
3544                 return -1;
3545             if (at(q, n, '.'))          // Beginning of IPv4 address
3546                 return -1;
3547             if (q > p + 4)
3548                 fail("IPv6 hexadecimal digit sequence too long", p);
3549             ipv6byteCount += 2;
3550             p = q;
3551             while (p < n) {
3552                 if (!at(p, n, ':'))
3553                     break;
3554                 if (at(p + 1, n, ':'))
3555                     break;              // "::"
3556                 p++;
3557                 q = scan(p, n, L_HEX, H_HEX);
3558                 if (q <= p)
3559                     failExpecting("digits for an IPv6 address", p);
3560                 if (at(q, n, '.')) {    // Beginning of IPv4 address
3561                     p--;
3562                     break;
3563                 }
3564                 if (q > p + 4)
3565                     fail("IPv6 hexadecimal digit sequence too long", p);
3566                 ipv6byteCount += 2;
3567                 p = q;
3568             }
3569 
3570             return p;
3571         }
3572 
3573     }
3574 
3575 }