1 /* 2 * reserved comment block 3 * DO NOT REMOVE OR ALTER! 4 */ 5 /* 6 * Licensed to the Apache Software Foundation (ASF) under one or more 7 * contributor license agreements. See the NOTICE file distributed with 8 * this work for additional information regarding copyright ownership. 9 * The ASF licenses this file to You under the Apache License, Version 2.0 10 * (the "License"); you may not use this file except in compliance with 11 * the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, software 16 * distributed under the License is distributed on an "AS IS" BASIS, 17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 * See the License for the specific language governing permissions and 19 * limitations under the License. 20 */ 21 22 package com.sun.org.apache.xerces.internal.util; 23 24 import java.io.IOException; 25 import java.io.Serializable; 26 import java.util.Objects; 27 28 /********************************************************************** 29 * A class to represent a Uniform Resource Identifier (URI). This class 30 * is designed to handle the parsing of URIs and provide access to 31 * the various components (scheme, host, port, userinfo, path, query 32 * string and fragment) that may constitute a URI. 33 * <p> 34 * Parsing of a URI specification is done according to the URI 35 * syntax described in 36 * <a href="http://www.ietf.org/rfc/rfc2396.txt?number=2396">RFC 2396</a>, 37 * and amended by 38 * <a href="http://www.ietf.org/rfc/rfc2732.txt?number=2732">RFC 2732</a>. 39 * <p> 40 * Every absolute URI consists of a scheme, followed by a colon (':'), 41 * followed by a scheme-specific part. For URIs that follow the 42 * "generic URI" syntax, the scheme-specific part begins with two 43 * slashes ("//") and may be followed by an authority segment (comprised 44 * of user information, host, and port), path segment, query segment 45 * and fragment. Note that RFC 2396 no longer specifies the use of the 46 * parameters segment and excludes the "user:password" syntax as part of 47 * the authority segment. If "user:password" appears in a URI, the entire 48 * user/password string is stored as userinfo. 49 * <p> 50 * For URIs that do not follow the "generic URI" syntax (e.g. mailto), 51 * the entire scheme-specific part is treated as the "path" portion 52 * of the URI. 53 * <p> 54 * Note that, unlike the java.net.URL class, this class does not provide 55 * any built-in network access functionality nor does it provide any 56 * scheme-specific functionality (for example, it does not know a 57 * default port for a specific scheme). Rather, it only knows the 58 * grammar and basic set of operations that can be applied to a URI. 59 * 60 * 61 **********************************************************************/ 62 public class URI implements Serializable { 63 64 /******************************************************************* 65 * MalformedURIExceptions are thrown in the process of building a URI 66 * or setting fields on a URI when an operation would result in an 67 * invalid URI specification. 68 * 69 ********************************************************************/ 70 public static class MalformedURIException extends IOException { 71 72 /** Serialization version. */ 73 static final long serialVersionUID = -6695054834342951930L; 74 75 /****************************************************************** 76 * Constructs a <code>MalformedURIException</code> with no specified 77 * detail message. 78 ******************************************************************/ 79 public MalformedURIException() { 80 super(); 81 } 82 83 /***************************************************************** 84 * Constructs a <code>MalformedURIException</code> with the 85 * specified detail message. 86 * 87 * @param p_msg the detail message. 88 ******************************************************************/ 89 public MalformedURIException(String p_msg) { 90 super(p_msg); 91 } 92 } 93 94 /** Serialization version. */ 95 static final long serialVersionUID = 1601921774685357214L; 96 97 private static final byte [] fgLookupTable = new byte[128]; 98 99 /** 100 * Character Classes 101 */ 102 103 /** reserved characters ;/?:@&=+$,[] */ 104 //RFC 2732 added '[' and ']' as reserved characters 105 private static final int RESERVED_CHARACTERS = 0x01; 106 107 /** URI punctuation mark characters: -_.!~*'() - these, combined with 108 alphanumerics, constitute the "unreserved" characters */ 109 private static final int MARK_CHARACTERS = 0x02; 110 111 /** scheme can be composed of alphanumerics and these characters: +-. */ 112 private static final int SCHEME_CHARACTERS = 0x04; 113 114 /** userinfo can be composed of unreserved, escaped and these 115 characters: ;:&=+$, */ 116 private static final int USERINFO_CHARACTERS = 0x08; 117 118 /** ASCII letter characters */ 119 private static final int ASCII_ALPHA_CHARACTERS = 0x10; 120 121 /** ASCII digit characters */ 122 private static final int ASCII_DIGIT_CHARACTERS = 0x20; 123 124 /** ASCII hex characters */ 125 private static final int ASCII_HEX_CHARACTERS = 0x40; 126 127 /** Path characters */ 128 private static final int PATH_CHARACTERS = 0x80; 129 130 /** Mask for alpha-numeric characters */ 131 private static final int MASK_ALPHA_NUMERIC = ASCII_ALPHA_CHARACTERS | ASCII_DIGIT_CHARACTERS; 132 133 /** Mask for unreserved characters */ 134 private static final int MASK_UNRESERVED_MASK = MASK_ALPHA_NUMERIC | MARK_CHARACTERS; 135 136 /** Mask for URI allowable characters except for % */ 137 private static final int MASK_URI_CHARACTER = MASK_UNRESERVED_MASK | RESERVED_CHARACTERS; 138 139 /** Mask for scheme characters */ 140 private static final int MASK_SCHEME_CHARACTER = MASK_ALPHA_NUMERIC | SCHEME_CHARACTERS; 141 142 /** Mask for userinfo characters */ 143 private static final int MASK_USERINFO_CHARACTER = MASK_UNRESERVED_MASK | USERINFO_CHARACTERS; 144 145 /** Mask for path characters */ 146 private static final int MASK_PATH_CHARACTER = MASK_UNRESERVED_MASK | PATH_CHARACTERS; 147 148 static { 149 // Add ASCII Digits and ASCII Hex Numbers 150 for (int i = '0'; i <= '9'; ++i) { 151 fgLookupTable[i] |= ASCII_DIGIT_CHARACTERS | ASCII_HEX_CHARACTERS; 152 } 153 154 // Add ASCII Letters and ASCII Hex Numbers 155 for (int i = 'A'; i <= 'F'; ++i) { 156 fgLookupTable[i] |= ASCII_ALPHA_CHARACTERS | ASCII_HEX_CHARACTERS; 157 fgLookupTable[i+0x00000020] |= ASCII_ALPHA_CHARACTERS | ASCII_HEX_CHARACTERS; 158 } 159 160 // Add ASCII Letters 161 for (int i = 'G'; i <= 'Z'; ++i) { 162 fgLookupTable[i] |= ASCII_ALPHA_CHARACTERS; 163 fgLookupTable[i+0x00000020] |= ASCII_ALPHA_CHARACTERS; 164 } 165 166 // Add Reserved Characters 167 fgLookupTable[';'] |= RESERVED_CHARACTERS; 168 fgLookupTable['/'] |= RESERVED_CHARACTERS; 169 fgLookupTable['?'] |= RESERVED_CHARACTERS; 170 fgLookupTable[':'] |= RESERVED_CHARACTERS; 171 fgLookupTable['@'] |= RESERVED_CHARACTERS; 172 fgLookupTable['&'] |= RESERVED_CHARACTERS; 173 fgLookupTable['='] |= RESERVED_CHARACTERS; 174 fgLookupTable['+'] |= RESERVED_CHARACTERS; 175 fgLookupTable['$'] |= RESERVED_CHARACTERS; 176 fgLookupTable[','] |= RESERVED_CHARACTERS; 177 fgLookupTable['['] |= RESERVED_CHARACTERS; 178 fgLookupTable[']'] |= RESERVED_CHARACTERS; 179 180 // Add Mark Characters 181 fgLookupTable['-'] |= MARK_CHARACTERS; 182 fgLookupTable['_'] |= MARK_CHARACTERS; 183 fgLookupTable['.'] |= MARK_CHARACTERS; 184 fgLookupTable['!'] |= MARK_CHARACTERS; 185 fgLookupTable['~'] |= MARK_CHARACTERS; 186 fgLookupTable['*'] |= MARK_CHARACTERS; 187 fgLookupTable['\''] |= MARK_CHARACTERS; 188 fgLookupTable['('] |= MARK_CHARACTERS; 189 fgLookupTable[')'] |= MARK_CHARACTERS; 190 191 // Add Scheme Characters 192 fgLookupTable['+'] |= SCHEME_CHARACTERS; 193 fgLookupTable['-'] |= SCHEME_CHARACTERS; 194 fgLookupTable['.'] |= SCHEME_CHARACTERS; 195 196 // Add Userinfo Characters 197 fgLookupTable[';'] |= USERINFO_CHARACTERS; 198 fgLookupTable[':'] |= USERINFO_CHARACTERS; 199 fgLookupTable['&'] |= USERINFO_CHARACTERS; 200 fgLookupTable['='] |= USERINFO_CHARACTERS; 201 fgLookupTable['+'] |= USERINFO_CHARACTERS; 202 fgLookupTable['$'] |= USERINFO_CHARACTERS; 203 fgLookupTable[','] |= USERINFO_CHARACTERS; 204 205 // Add Path Characters 206 fgLookupTable[';'] |= PATH_CHARACTERS; 207 fgLookupTable['/'] |= PATH_CHARACTERS; 208 fgLookupTable[':'] |= PATH_CHARACTERS; 209 fgLookupTable['@'] |= PATH_CHARACTERS; 210 fgLookupTable['&'] |= PATH_CHARACTERS; 211 fgLookupTable['='] |= PATH_CHARACTERS; 212 fgLookupTable['+'] |= PATH_CHARACTERS; 213 fgLookupTable['$'] |= PATH_CHARACTERS; 214 fgLookupTable[','] |= PATH_CHARACTERS; 215 } 216 217 /** Stores the scheme (usually the protocol) for this URI. */ 218 private String m_scheme = null; 219 220 /** If specified, stores the userinfo for this URI; otherwise null */ 221 private String m_userinfo = null; 222 223 /** If specified, stores the host for this URI; otherwise null */ 224 private String m_host = null; 225 226 /** If specified, stores the port for this URI; otherwise -1 */ 227 private int m_port = -1; 228 229 /** If specified, stores the registry based authority for this URI; otherwise -1 */ 230 private String m_regAuthority = null; 231 232 /** If specified, stores the path for this URI; otherwise null */ 233 private String m_path = null; 234 235 /** If specified, stores the query string for this URI; otherwise 236 null. */ 237 private String m_queryString = null; 238 239 /** If specified, stores the fragment for this URI; otherwise null */ 240 private String m_fragment = null; 241 242 private static boolean DEBUG = false; 243 244 /** 245 * Construct a new and uninitialized URI. 246 */ 247 public URI() { 248 } 249 250 /** 251 * Construct a new URI from another URI. All fields for this URI are 252 * set equal to the fields of the URI passed in. 253 * 254 * @param p_other the URI to copy (cannot be null) 255 */ 256 public URI(URI p_other) { 257 initialize(p_other); 258 } 259 260 /** 261 * Construct a new URI from a URI specification string. If the 262 * specification follows the "generic URI" syntax, (two slashes 263 * following the first colon), the specification will be parsed 264 * accordingly - setting the scheme, userinfo, host,port, path, query 265 * string and fragment fields as necessary. If the specification does 266 * not follow the "generic URI" syntax, the specification is parsed 267 * into a scheme and scheme-specific part (stored as the path) only. 268 * 269 * @param p_uriSpec the URI specification string (cannot be null or 270 * empty) 271 * 272 * @exception MalformedURIException if p_uriSpec violates any syntax 273 * rules 274 */ 275 public URI(String p_uriSpec) throws MalformedURIException { 276 this((URI)null, p_uriSpec); 277 } 278 279 /** 280 * Construct a new URI from a URI specification string. If the 281 * specification follows the "generic URI" syntax, (two slashes 282 * following the first colon), the specification will be parsed 283 * accordingly - setting the scheme, userinfo, host,port, path, query 284 * string and fragment fields as necessary. If the specification does 285 * not follow the "generic URI" syntax, the specification is parsed 286 * into a scheme and scheme-specific part (stored as the path) only. 287 * Construct a relative URI if boolean is assigned to "true" 288 * and p_uriSpec is not valid absolute URI, instead of throwing an exception. 289 * 290 * @param p_uriSpec the URI specification string (cannot be null or 291 * empty) 292 * @param allowNonAbsoluteURI true to permit non-absolute URIs, 293 * false otherwise. 294 * 295 * @exception MalformedURIException if p_uriSpec violates any syntax 296 * rules 297 */ 298 public URI(String p_uriSpec, boolean allowNonAbsoluteURI) throws MalformedURIException { 299 this((URI)null, p_uriSpec, allowNonAbsoluteURI); 300 } 301 302 /** 303 * Construct a new URI from a base URI and a URI specification string. 304 * The URI specification string may be a relative URI. 305 * 306 * @param p_base the base URI (cannot be null if p_uriSpec is null or 307 * empty) 308 * @param p_uriSpec the URI specification string (cannot be null or 309 * empty if p_base is null) 310 * 311 * @exception MalformedURIException if p_uriSpec violates any syntax 312 * rules 313 */ 314 public URI(URI p_base, String p_uriSpec) throws MalformedURIException { 315 initialize(p_base, p_uriSpec); 316 } 317 318 /** 319 * Construct a new URI from a base URI and a URI specification string. 320 * The URI specification string may be a relative URI. 321 * Construct a relative URI if boolean is assigned to "true" 322 * and p_uriSpec is not valid absolute URI and p_base is null 323 * instead of throwing an exception. 324 * 325 * @param p_base the base URI (cannot be null if p_uriSpec is null or 326 * empty) 327 * @param p_uriSpec the URI specification string (cannot be null or 328 * empty if p_base is null) 329 * @param allowNonAbsoluteURI true to permit non-absolute URIs, 330 * false otherwise. 331 * 332 * @exception MalformedURIException if p_uriSpec violates any syntax 333 * rules 334 */ 335 public URI(URI p_base, String p_uriSpec, boolean allowNonAbsoluteURI) throws MalformedURIException { 336 initialize(p_base, p_uriSpec, allowNonAbsoluteURI); 337 } 338 339 /** 340 * Construct a new URI that does not follow the generic URI syntax. 341 * Only the scheme and scheme-specific part (stored as the path) are 342 * initialized. 343 * 344 * @param p_scheme the URI scheme (cannot be null or empty) 345 * @param p_schemeSpecificPart the scheme-specific part (cannot be 346 * null or empty) 347 * 348 * @exception MalformedURIException if p_scheme violates any 349 * syntax rules 350 */ 351 public URI(String p_scheme, String p_schemeSpecificPart) 352 throws MalformedURIException { 353 if (p_scheme == null || p_scheme.trim().length() == 0) { 354 throw new MalformedURIException( 355 "Cannot construct URI with null/empty scheme!"); 356 } 357 if (p_schemeSpecificPart == null || 358 p_schemeSpecificPart.trim().length() == 0) { 359 throw new MalformedURIException( 360 "Cannot construct URI with null/empty scheme-specific part!"); 361 } 362 setScheme(p_scheme); 363 setPath(p_schemeSpecificPart); 364 } 365 366 /** 367 * Construct a new URI that follows the generic URI syntax from its 368 * component parts. Each component is validated for syntax and some 369 * basic semantic checks are performed as well. See the individual 370 * setter methods for specifics. 371 * 372 * @param p_scheme the URI scheme (cannot be null or empty) 373 * @param p_host the hostname, IPv4 address or IPv6 reference for the URI 374 * @param p_path the URI path - if the path contains '?' or '#', 375 * then the query string and/or fragment will be 376 * set from the path; however, if the query and 377 * fragment are specified both in the path and as 378 * separate parameters, an exception is thrown 379 * @param p_queryString the URI query string (cannot be specified 380 * if path is null) 381 * @param p_fragment the URI fragment (cannot be specified if path 382 * is null) 383 * 384 * @exception MalformedURIException if any of the parameters violates 385 * syntax rules or semantic rules 386 */ 387 public URI(String p_scheme, String p_host, String p_path, 388 String p_queryString, String p_fragment) 389 throws MalformedURIException { 390 this(p_scheme, null, p_host, -1, p_path, p_queryString, p_fragment); 391 } 392 393 /** 394 * Construct a new URI that follows the generic URI syntax from its 395 * component parts. Each component is validated for syntax and some 396 * basic semantic checks are performed as well. See the individual 397 * setter methods for specifics. 398 * 399 * @param p_scheme the URI scheme (cannot be null or empty) 400 * @param p_userinfo the URI userinfo (cannot be specified if host 401 * is null) 402 * @param p_host the hostname, IPv4 address or IPv6 reference for the URI 403 * @param p_port the URI port (may be -1 for "unspecified"; cannot 404 * be specified if host is null) 405 * @param p_path the URI path - if the path contains '?' or '#', 406 * then the query string and/or fragment will be 407 * set from the path; however, if the query and 408 * fragment are specified both in the path and as 409 * separate parameters, an exception is thrown 410 * @param p_queryString the URI query string (cannot be specified 411 * if path is null) 412 * @param p_fragment the URI fragment (cannot be specified if path 413 * is null) 414 * 415 * @exception MalformedURIException if any of the parameters violates 416 * syntax rules or semantic rules 417 */ 418 public URI(String p_scheme, String p_userinfo, 419 String p_host, int p_port, String p_path, 420 String p_queryString, String p_fragment) 421 throws MalformedURIException { 422 if (p_scheme == null || p_scheme.trim().length() == 0) { 423 throw new MalformedURIException("Scheme is required!"); 424 } 425 426 if (p_host == null) { 427 if (p_userinfo != null) { 428 throw new MalformedURIException( 429 "Userinfo may not be specified if host is not specified!"); 430 } 431 if (p_port != -1) { 432 throw new MalformedURIException( 433 "Port may not be specified if host is not specified!"); 434 } 435 } 436 437 if (p_path != null) { 438 if (p_path.indexOf('?') != -1 && p_queryString != null) { 439 throw new MalformedURIException( 440 "Query string cannot be specified in path and query string!"); 441 } 442 443 if (p_path.indexOf('#') != -1 && p_fragment != null) { 444 throw new MalformedURIException( 445 "Fragment cannot be specified in both the path and fragment!"); 446 } 447 } 448 449 setScheme(p_scheme); 450 setHost(p_host); 451 setPort(p_port); 452 setUserinfo(p_userinfo); 453 setPath(p_path); 454 setQueryString(p_queryString); 455 setFragment(p_fragment); 456 } 457 458 /** 459 * Initialize all fields of this URI from another URI. 460 * 461 * @param p_other the URI to copy (cannot be null) 462 */ 463 private void initialize(URI p_other) { 464 m_scheme = p_other.getScheme(); 465 m_userinfo = p_other.getUserinfo(); 466 m_host = p_other.getHost(); 467 m_port = p_other.getPort(); 468 m_regAuthority = p_other.getRegBasedAuthority(); 469 m_path = p_other.getPath(); 470 m_queryString = p_other.getQueryString(); 471 m_fragment = p_other.getFragment(); 472 } 473 474 /** 475 * Initializes this URI from a base URI and a URI specification string. 476 * See RFC 2396 Section 4 and Appendix B for specifications on parsing 477 * the URI and Section 5 for specifications on resolving relative URIs 478 * and relative paths. 479 * 480 * @param p_base the base URI (may be null if p_uriSpec is an absolute 481 * URI) 482 * @param p_uriSpec the URI spec string which may be an absolute or 483 * relative URI (can only be null/empty if p_base 484 * is not null) 485 * @param allowNonAbsoluteURI true to permit non-absolute URIs, 486 * in case of relative URI, false otherwise. 487 * 488 * @exception MalformedURIException if p_base is null and p_uriSpec 489 * is not an absolute URI or if 490 * p_uriSpec violates syntax rules 491 */ 492 private void initialize(URI p_base, String p_uriSpec, boolean allowNonAbsoluteURI) 493 throws MalformedURIException { 494 495 String uriSpec = p_uriSpec; 496 int uriSpecLen = (uriSpec != null) ? uriSpec.length() : 0; 497 498 if (p_base == null && uriSpecLen == 0) { 499 if (allowNonAbsoluteURI) { 500 m_path = ""; 501 return; 502 } 503 throw new MalformedURIException("Cannot initialize URI with empty parameters."); 504 } 505 506 // just make a copy of the base if spec is empty 507 if (uriSpecLen == 0) { 508 initialize(p_base); 509 return; 510 } 511 512 int index = 0; 513 514 // Check for scheme, which must be before '/', '?' or '#'. 515 int colonIdx = uriSpec.indexOf(':'); 516 if (colonIdx != -1) { 517 final int searchFrom = colonIdx - 1; 518 // search backwards starting from character before ':'. 519 int slashIdx = uriSpec.lastIndexOf('/', searchFrom); 520 int queryIdx = uriSpec.lastIndexOf('?', searchFrom); 521 int fragmentIdx = uriSpec.lastIndexOf('#', searchFrom); 522 523 if (colonIdx == 0 || slashIdx != -1 || 524 queryIdx != -1 || fragmentIdx != -1) { 525 // A standalone base is a valid URI according to spec 526 if (colonIdx == 0 || (p_base == null && fragmentIdx != 0 && !allowNonAbsoluteURI)) { 527 throw new MalformedURIException("No scheme found in URI."); 528 } 529 } 530 else { 531 initializeScheme(uriSpec); 532 index = m_scheme.length()+1; 533 534 // Neither 'scheme:' or 'scheme:#fragment' are valid URIs. 535 if (colonIdx == uriSpecLen - 1 || uriSpec.charAt(colonIdx+1) == '#') { 536 throw new MalformedURIException("Scheme specific part cannot be empty."); 537 } 538 } 539 } 540 else if (p_base == null && uriSpec.indexOf('#') != 0 && !allowNonAbsoluteURI) { 541 throw new MalformedURIException("No scheme found in URI."); 542 } 543 544 // Two slashes means we may have authority, but definitely means we're either 545 // matching net_path or abs_path. These two productions are ambiguous in that 546 // every net_path (except those containing an IPv6Reference) is an abs_path. 547 // RFC 2396 resolves this ambiguity by applying a greedy left most matching rule. 548 // Try matching net_path first, and if that fails we don't have authority so 549 // then attempt to match abs_path. 550 // 551 // net_path = "//" authority [ abs_path ] 552 // abs_path = "/" path_segments 553 if (((index+1) < uriSpecLen) && 554 (uriSpec.charAt(index) == '/' && uriSpec.charAt(index+1) == '/')) { 555 index += 2; 556 int startPos = index; 557 558 // Authority will be everything up to path, query or fragment 559 char testChar = '\0'; 560 while (index < uriSpecLen) { 561 testChar = uriSpec.charAt(index); 562 if (testChar == '/' || testChar == '?' || testChar == '#') { 563 break; 564 } 565 index++; 566 } 567 568 // Attempt to parse authority. If the section is an empty string 569 // this is a valid server based authority, so set the host to this 570 // value. 571 if (index > startPos) { 572 // If we didn't find authority we need to back up. Attempt to 573 // match against abs_path next. 574 if (!initializeAuthority(uriSpec.substring(startPos, index))) { 575 index = startPos - 2; 576 } 577 } 578 else { 579 m_host = ""; 580 } 581 } 582 583 initializePath(uriSpec, index); 584 585 // Resolve relative URI to base URI - see RFC 2396 Section 5.2 586 // In some cases, it might make more sense to throw an exception 587 // (when scheme is specified is the string spec and the base URI 588 // is also specified, for example), but we're just following the 589 // RFC specifications 590 if (p_base != null) { 591 absolutize(p_base); 592 } 593 } 594 595 /** 596 * Initializes this URI from a base URI and a URI specification string. 597 * See RFC 2396 Section 4 and Appendix B for specifications on parsing 598 * the URI and Section 5 for specifications on resolving relative URIs 599 * and relative paths. 600 * 601 * @param p_base the base URI (may be null if p_uriSpec is an absolute 602 * URI) 603 * @param p_uriSpec the URI spec string which may be an absolute or 604 * relative URI (can only be null/empty if p_base 605 * is not null) 606 * 607 * @exception MalformedURIException if p_base is null and p_uriSpec 608 * is not an absolute URI or if 609 * p_uriSpec violates syntax rules 610 */ 611 private void initialize(URI p_base, String p_uriSpec) 612 throws MalformedURIException { 613 614 String uriSpec = p_uriSpec; 615 int uriSpecLen = (uriSpec != null) ? uriSpec.length() : 0; 616 617 if (p_base == null && uriSpecLen == 0) { 618 throw new MalformedURIException( 619 "Cannot initialize URI with empty parameters."); 620 } 621 622 // just make a copy of the base if spec is empty 623 if (uriSpecLen == 0) { 624 initialize(p_base); 625 return; 626 } 627 628 int index = 0; 629 630 // Check for scheme, which must be before '/', '?' or '#'. 631 int colonIdx = uriSpec.indexOf(':'); 632 if (colonIdx != -1) { 633 final int searchFrom = colonIdx - 1; 634 // search backwards starting from character before ':'. 635 int slashIdx = uriSpec.lastIndexOf('/', searchFrom); 636 int queryIdx = uriSpec.lastIndexOf('?', searchFrom); 637 int fragmentIdx = uriSpec.lastIndexOf('#', searchFrom); 638 639 if (colonIdx == 0 || slashIdx != -1 || 640 queryIdx != -1 || fragmentIdx != -1) { 641 // A standalone base is a valid URI according to spec 642 if (colonIdx == 0 || (p_base == null && fragmentIdx != 0)) { 643 throw new MalformedURIException("No scheme found in URI."); 644 } 645 } 646 else { 647 initializeScheme(uriSpec); 648 index = m_scheme.length()+1; 649 650 // Neither 'scheme:' or 'scheme:#fragment' are valid URIs. 651 if (colonIdx == uriSpecLen - 1 || uriSpec.charAt(colonIdx+1) == '#') { 652 throw new MalformedURIException("Scheme specific part cannot be empty."); 653 } 654 } 655 } 656 else if (p_base == null && uriSpec.indexOf('#') != 0) { 657 throw new MalformedURIException("No scheme found in URI."); 658 } 659 660 // Two slashes means we may have authority, but definitely means we're either 661 // matching net_path or abs_path. These two productions are ambiguous in that 662 // every net_path (except those containing an IPv6Reference) is an abs_path. 663 // RFC 2396 resolves this ambiguity by applying a greedy left most matching rule. 664 // Try matching net_path first, and if that fails we don't have authority so 665 // then attempt to match abs_path. 666 // 667 // net_path = "//" authority [ abs_path ] 668 // abs_path = "/" path_segments 669 if (((index+1) < uriSpecLen) && 670 (uriSpec.charAt(index) == '/' && uriSpec.charAt(index+1) == '/')) { 671 index += 2; 672 int startPos = index; 673 674 // Authority will be everything up to path, query or fragment 675 char testChar = '\0'; 676 while (index < uriSpecLen) { 677 testChar = uriSpec.charAt(index); 678 if (testChar == '/' || testChar == '?' || testChar == '#') { 679 break; 680 } 681 index++; 682 } 683 684 // Attempt to parse authority. If the section is an empty string 685 // this is a valid server based authority, so set the host to this 686 // value. 687 if (index > startPos) { 688 // If we didn't find authority we need to back up. Attempt to 689 // match against abs_path next. 690 if (!initializeAuthority(uriSpec.substring(startPos, index))) { 691 index = startPos - 2; 692 } 693 } else if (index < uriSpecLen) { 694 //Same as java.net.URI: 695 // DEVIATION: Allow empty authority prior to non-empty 696 // path, query component or fragment identifier 697 m_host = ""; 698 } else { 699 throw new MalformedURIException("Expected authority."); 700 } 701 } 702 703 initializePath(uriSpec, index); 704 705 // Resolve relative URI to base URI - see RFC 2396 Section 5.2 706 // In some cases, it might make more sense to throw an exception 707 // (when scheme is specified is the string spec and the base URI 708 // is also specified, for example), but we're just following the 709 // RFC specifications 710 if (p_base != null) { 711 absolutize(p_base); 712 } 713 } 714 715 /** 716 * Absolutize URI with given base URI. 717 * 718 * @param p_base base URI for absolutization 719 */ 720 public void absolutize(URI p_base) { 721 722 // check to see if this is the current doc - RFC 2396 5.2 #2 723 // note that this is slightly different from the RFC spec in that 724 // we don't include the check for query string being null 725 // - this handles cases where the urispec is just a query 726 // string or a fragment (e.g. "?y" or "#s") - 727 // see <http://www.ics.uci.edu/~fielding/url/test1.html> which 728 // identified this as a bug in the RFC 729 if (m_path.length() == 0 && m_scheme == null && 730 m_host == null && m_regAuthority == null) { 731 m_scheme = p_base.getScheme(); 732 m_userinfo = p_base.getUserinfo(); 733 m_host = p_base.getHost(); 734 m_port = p_base.getPort(); 735 m_regAuthority = p_base.getRegBasedAuthority(); 736 m_path = p_base.getPath(); 737 738 if (m_queryString == null) { 739 m_queryString = p_base.getQueryString(); 740 741 if (m_fragment == null) { 742 m_fragment = p_base.getFragment(); 743 } 744 } 745 return; 746 } 747 748 // check for scheme - RFC 2396 5.2 #3 749 // if we found a scheme, it means absolute URI, so we're done 750 if (m_scheme == null) { 751 m_scheme = p_base.getScheme(); 752 } 753 else { 754 return; 755 } 756 757 // check for authority - RFC 2396 5.2 #4 758 // if we found a host, then we've got a network path, so we're done 759 if (m_host == null && m_regAuthority == null) { 760 m_userinfo = p_base.getUserinfo(); 761 m_host = p_base.getHost(); 762 m_port = p_base.getPort(); 763 m_regAuthority = p_base.getRegBasedAuthority(); 764 } 765 else { 766 return; 767 } 768 769 // check for absolute path - RFC 2396 5.2 #5 770 if (m_path.length() > 0 && 771 m_path.startsWith("/")) { 772 return; 773 } 774 775 // if we get to this point, we need to resolve relative path 776 // RFC 2396 5.2 #6 777 String path = ""; 778 String basePath = p_base.getPath(); 779 780 // 6a - get all but the last segment of the base URI path 781 if (basePath != null && basePath.length() > 0) { 782 int lastSlash = basePath.lastIndexOf('/'); 783 if (lastSlash != -1) { 784 path = basePath.substring(0, lastSlash+1); 785 } 786 } 787 else if (m_path.length() > 0) { 788 path = "/"; 789 } 790 791 // 6b - append the relative URI path 792 path = path.concat(m_path); 793 794 // 6c - remove all "./" where "." is a complete path segment 795 int index = -1; 796 while ((index = path.indexOf("/./")) != -1) { 797 path = path.substring(0, index+1).concat(path.substring(index+3)); 798 } 799 800 // 6d - remove "." if path ends with "." as a complete path segment 801 if (path.endsWith("/.")) { 802 path = path.substring(0, path.length()-1); 803 } 804 805 // 6e - remove all "<segment>/../" where "<segment>" is a complete 806 // path segment not equal to ".." 807 index = 1; 808 int segIndex = -1; 809 String tempString = null; 810 811 while ((index = path.indexOf("/../", index)) > 0) { 812 tempString = path.substring(0, path.indexOf("/../")); 813 segIndex = tempString.lastIndexOf('/'); 814 if (segIndex != -1) { 815 if (!tempString.substring(segIndex).equals("..")) { 816 path = path.substring(0, segIndex+1).concat(path.substring(index+4)); 817 index = segIndex; 818 } 819 else { 820 index += 4; 821 } 822 } 823 else { 824 index += 4; 825 } 826 } 827 828 // 6f - remove ending "<segment>/.." where "<segment>" is a 829 // complete path segment 830 if (path.endsWith("/..")) { 831 tempString = path.substring(0, path.length()-3); 832 segIndex = tempString.lastIndexOf('/'); 833 if (segIndex != -1) { 834 path = path.substring(0, segIndex+1); 835 } 836 } 837 m_path = path; 838 } 839 840 /** 841 * Initialize the scheme for this URI from a URI string spec. 842 * 843 * @param p_uriSpec the URI specification (cannot be null) 844 * 845 * @exception MalformedURIException if URI does not have a conformant 846 * scheme 847 */ 848 private void initializeScheme(String p_uriSpec) 849 throws MalformedURIException { 850 int uriSpecLen = p_uriSpec.length(); 851 int index = 0; 852 String scheme = null; 853 char testChar = '\0'; 854 855 while (index < uriSpecLen) { 856 testChar = p_uriSpec.charAt(index); 857 if (testChar == ':' || testChar == '/' || 858 testChar == '?' || testChar == '#') { 859 break; 860 } 861 index++; 862 } 863 scheme = p_uriSpec.substring(0, index); 864 865 if (scheme.length() == 0) { 866 throw new MalformedURIException("No scheme found in URI."); 867 } 868 else { 869 setScheme(scheme); 870 } 871 } 872 873 /** 874 * Initialize the authority (either server or registry based) 875 * for this URI from a URI string spec. 876 * 877 * @param p_uriSpec the URI specification (cannot be null) 878 * 879 * @return true if the given string matched server or registry 880 * based authority 881 */ 882 private boolean initializeAuthority(String p_uriSpec) { 883 884 int index = 0; 885 int start = 0; 886 int end = p_uriSpec.length(); 887 888 char testChar = '\0'; 889 String userinfo = null; 890 891 // userinfo is everything up to @ 892 if (p_uriSpec.indexOf('@', start) != -1) { 893 while (index < end) { 894 testChar = p_uriSpec.charAt(index); 895 if (testChar == '@') { 896 break; 897 } 898 index++; 899 } 900 userinfo = p_uriSpec.substring(start, index); 901 index++; 902 } 903 904 // host is everything up to last ':', or up to 905 // and including ']' if followed by ':'. 906 String host = null; 907 start = index; 908 boolean hasPort = false; 909 if (index < end) { 910 if (p_uriSpec.charAt(start) == '[') { 911 int bracketIndex = p_uriSpec.indexOf(']', start); 912 index = (bracketIndex != -1) ? bracketIndex : end; 913 if (index+1 < end && p_uriSpec.charAt(index+1) == ':') { 914 ++index; 915 hasPort = true; 916 } 917 else { 918 index = end; 919 } 920 } 921 else { 922 int colonIndex = p_uriSpec.lastIndexOf(':', end); 923 index = (colonIndex > start) ? colonIndex : end; 924 hasPort = (index != end); 925 } 926 } 927 host = p_uriSpec.substring(start, index); 928 int port = -1; 929 if (host.length() > 0) { 930 // port 931 if (hasPort) { 932 index++; 933 start = index; 934 while (index < end) { 935 index++; 936 } 937 String portStr = p_uriSpec.substring(start, index); 938 if (portStr.length() > 0) { 939 // REVISIT: Remove this code. 940 /** for (int i = 0; i < portStr.length(); i++) { 941 if (!isDigit(portStr.charAt(i))) { 942 throw new MalformedURIException( 943 portStr + 944 " is invalid. Port should only contain digits!"); 945 } 946 }**/ 947 // REVISIT: Remove this code. 948 // Store port value as string instead of integer. 949 try { 950 port = Integer.parseInt(portStr); 951 if (port == -1) --port; 952 } 953 catch (NumberFormatException nfe) { 954 port = -2; 955 } 956 } 957 } 958 } 959 960 if (isValidServerBasedAuthority(host, port, userinfo)) { 961 m_host = host; 962 m_port = port; 963 m_userinfo = userinfo; 964 return true; 965 } 966 // Note: Registry based authority is being removed from a 967 // new spec for URI which would obsolete RFC 2396. If the 968 // spec is added to XML errata, processing of reg_name 969 // needs to be removed. - mrglavas. 970 else if (isValidRegistryBasedAuthority(p_uriSpec)) { 971 m_regAuthority = p_uriSpec; 972 return true; 973 } 974 return false; 975 } 976 977 /** 978 * Determines whether the components host, port, and user info 979 * are valid as a server authority. 980 * 981 * @param host the host component of authority 982 * @param port the port number component of authority 983 * @param userinfo the user info component of authority 984 * 985 * @return true if the given host, port, and userinfo compose 986 * a valid server authority 987 */ 988 private boolean isValidServerBasedAuthority(String host, int port, String userinfo) { 989 990 // Check if the host is well formed. 991 if (!isWellFormedAddress(host)) { 992 return false; 993 } 994 995 // Check that port is well formed if it exists. 996 // REVISIT: There's no restriction on port value ranges, but 997 // perform the same check as in setPort to be consistent. Pass 998 // in a string to this method instead of an integer. 999 if (port < -1 || port > 65535) { 1000 return false; 1001 } 1002 1003 // Check that userinfo is well formed if it exists. 1004 if (userinfo != null) { 1005 // Userinfo can contain alphanumerics, mark characters, escaped 1006 // and ';',':','&','=','+','$',',' 1007 int index = 0; 1008 int end = userinfo.length(); 1009 char testChar = '\0'; 1010 while (index < end) { 1011 testChar = userinfo.charAt(index); 1012 if (testChar == '%') { 1013 if (index+2 >= end || 1014 !isHex(userinfo.charAt(index+1)) || 1015 !isHex(userinfo.charAt(index+2))) { 1016 return false; 1017 } 1018 index += 2; 1019 } 1020 else if (!isUserinfoCharacter(testChar)) { 1021 return false; 1022 } 1023 ++index; 1024 } 1025 } 1026 return true; 1027 } 1028 1029 /** 1030 * Determines whether the given string is a registry based authority. 1031 * 1032 * @param authority the authority component of a URI 1033 * 1034 * @return true if the given string is a registry based authority 1035 */ 1036 private boolean isValidRegistryBasedAuthority(String authority) { 1037 int index = 0; 1038 int end = authority.length(); 1039 char testChar; 1040 1041 while (index < end) { 1042 testChar = authority.charAt(index); 1043 1044 // check for valid escape sequence 1045 if (testChar == '%') { 1046 if (index+2 >= end || 1047 !isHex(authority.charAt(index+1)) || 1048 !isHex(authority.charAt(index+2))) { 1049 return false; 1050 } 1051 index += 2; 1052 } 1053 // can check against path characters because the set 1054 // is the same except for '/' which we've already excluded. 1055 else if (!isPathCharacter(testChar)) { 1056 return false; 1057 } 1058 ++index; 1059 } 1060 return true; 1061 } 1062 1063 /** 1064 * Initialize the path for this URI from a URI string spec. 1065 * 1066 * @param p_uriSpec the URI specification (cannot be null) 1067 * @param p_nStartIndex the index to begin scanning from 1068 * 1069 * @exception MalformedURIException if p_uriSpec violates syntax rules 1070 */ 1071 private void initializePath(String p_uriSpec, int p_nStartIndex) 1072 throws MalformedURIException { 1073 if (p_uriSpec == null) { 1074 throw new MalformedURIException( 1075 "Cannot initialize path from null string!"); 1076 } 1077 1078 int index = p_nStartIndex; 1079 int start = p_nStartIndex; 1080 int end = p_uriSpec.length(); 1081 char testChar = '\0'; 1082 1083 // path - everything up to query string or fragment 1084 if (start < end) { 1085 // RFC 2732 only allows '[' and ']' to appear in the opaque part. 1086 if (getScheme() == null || p_uriSpec.charAt(start) == '/') { 1087 1088 // Scan path. 1089 // abs_path = "/" path_segments 1090 // rel_path = rel_segment [ abs_path ] 1091 while (index < end) { 1092 testChar = p_uriSpec.charAt(index); 1093 1094 // check for valid escape sequence 1095 if (testChar == '%') { 1096 if (index+2 >= end || 1097 !isHex(p_uriSpec.charAt(index+1)) || 1098 !isHex(p_uriSpec.charAt(index+2))) { 1099 throw new MalformedURIException( 1100 "Path contains invalid escape sequence!"); 1101 } 1102 index += 2; 1103 } 1104 // Path segments cannot contain '[' or ']' since pchar 1105 // production was not changed by RFC 2732. 1106 else if (!isPathCharacter(testChar)) { 1107 if (testChar == '?' || testChar == '#') { 1108 break; 1109 } 1110 throw new MalformedURIException( 1111 "Path contains invalid character: " + testChar); 1112 } 1113 ++index; 1114 } 1115 } 1116 else { 1117 1118 // Scan opaque part. 1119 // opaque_part = uric_no_slash *uric 1120 while (index < end) { 1121 testChar = p_uriSpec.charAt(index); 1122 1123 if (testChar == '?' || testChar == '#') { 1124 break; 1125 } 1126 1127 // check for valid escape sequence 1128 if (testChar == '%') { 1129 if (index+2 >= end || 1130 !isHex(p_uriSpec.charAt(index+1)) || 1131 !isHex(p_uriSpec.charAt(index+2))) { 1132 throw new MalformedURIException( 1133 "Opaque part contains invalid escape sequence!"); 1134 } 1135 index += 2; 1136 } 1137 // If the scheme specific part is opaque, it can contain '[' 1138 // and ']'. uric_no_slash wasn't modified by RFC 2732, which 1139 // I've interpreted as an error in the spec, since the 1140 // production should be equivalent to (uric - '/'), and uric 1141 // contains '[' and ']'. - mrglavas 1142 else if (!isURICharacter(testChar)) { 1143 throw new MalformedURIException( 1144 "Opaque part contains invalid character: " + testChar); 1145 } 1146 ++index; 1147 } 1148 } 1149 } 1150 m_path = p_uriSpec.substring(start, index); 1151 1152 // query - starts with ? and up to fragment or end 1153 if (testChar == '?') { 1154 index++; 1155 start = index; 1156 while (index < end) { 1157 testChar = p_uriSpec.charAt(index); 1158 if (testChar == '#') { 1159 break; 1160 } 1161 if (testChar == '%') { 1162 if (index+2 >= end || 1163 !isHex(p_uriSpec.charAt(index+1)) || 1164 !isHex(p_uriSpec.charAt(index+2))) { 1165 throw new MalformedURIException( 1166 "Query string contains invalid escape sequence!"); 1167 } 1168 index += 2; 1169 } 1170 else if (!isURICharacter(testChar)) { 1171 throw new MalformedURIException( 1172 "Query string contains invalid character: " + testChar); 1173 } 1174 index++; 1175 } 1176 m_queryString = p_uriSpec.substring(start, index); 1177 } 1178 1179 // fragment - starts with # 1180 if (testChar == '#') { 1181 index++; 1182 start = index; 1183 while (index < end) { 1184 testChar = p_uriSpec.charAt(index); 1185 1186 if (testChar == '%') { 1187 if (index+2 >= end || 1188 !isHex(p_uriSpec.charAt(index+1)) || 1189 !isHex(p_uriSpec.charAt(index+2))) { 1190 throw new MalformedURIException( 1191 "Fragment contains invalid escape sequence!"); 1192 } 1193 index += 2; 1194 } 1195 else if (!isURICharacter(testChar)) { 1196 throw new MalformedURIException( 1197 "Fragment contains invalid character: "+testChar); 1198 } 1199 index++; 1200 } 1201 m_fragment = p_uriSpec.substring(start, index); 1202 } 1203 } 1204 1205 /** 1206 * Get the scheme for this URI. 1207 * 1208 * @return the scheme for this URI 1209 */ 1210 public String getScheme() { 1211 return m_scheme; 1212 } 1213 1214 /** 1215 * Get the scheme-specific part for this URI (everything following the 1216 * scheme and the first colon). See RFC 2396 Section 5.2 for spec. 1217 * 1218 * @return the scheme-specific part for this URI 1219 */ 1220 public String getSchemeSpecificPart() { 1221 final StringBuilder schemespec = new StringBuilder(); 1222 1223 if (m_host != null || m_regAuthority != null) { 1224 schemespec.append("//"); 1225 1226 // Server based authority. 1227 if (m_host != null) { 1228 1229 if (m_userinfo != null) { 1230 schemespec.append(m_userinfo); 1231 schemespec.append('@'); 1232 } 1233 1234 schemespec.append(m_host); 1235 1236 if (m_port != -1) { 1237 schemespec.append(':'); 1238 schemespec.append(m_port); 1239 } 1240 } 1241 // Registry based authority. 1242 else { 1243 schemespec.append(m_regAuthority); 1244 } 1245 } 1246 1247 if (m_path != null) { 1248 schemespec.append((m_path)); 1249 } 1250 1251 if (m_queryString != null) { 1252 schemespec.append('?'); 1253 schemespec.append(m_queryString); 1254 } 1255 1256 if (m_fragment != null) { 1257 schemespec.append('#'); 1258 schemespec.append(m_fragment); 1259 } 1260 1261 return schemespec.toString(); 1262 } 1263 1264 /** 1265 * Get the userinfo for this URI. 1266 * 1267 * @return the userinfo for this URI (null if not specified). 1268 */ 1269 public String getUserinfo() { 1270 return m_userinfo; 1271 } 1272 1273 /** 1274 * Get the host for this URI. 1275 * 1276 * @return the host for this URI (null if not specified). 1277 */ 1278 public String getHost() { 1279 return m_host; 1280 } 1281 1282 /** 1283 * Get the port for this URI. 1284 * 1285 * @return the port for this URI (-1 if not specified). 1286 */ 1287 public int getPort() { 1288 return m_port; 1289 } 1290 1291 /** 1292 * Get the registry based authority for this URI. 1293 * 1294 * @return the registry based authority (null if not specified). 1295 */ 1296 public String getRegBasedAuthority() { 1297 return m_regAuthority; 1298 } 1299 1300 /** 1301 * Get the authority for this URI. 1302 * 1303 * @return the authority 1304 */ 1305 public String getAuthority() { 1306 final StringBuilder authority = new StringBuilder(); 1307 if (m_host != null || m_regAuthority != null) { 1308 authority.append("//"); 1309 1310 // Server based authority. 1311 if (m_host != null) { 1312 1313 if (m_userinfo != null) { 1314 authority.append(m_userinfo); 1315 authority.append('@'); 1316 } 1317 1318 authority.append(m_host); 1319 1320 if (m_port != -1) { 1321 authority.append(':'); 1322 authority.append(m_port); 1323 } 1324 } 1325 // Registry based authority. 1326 else { 1327 authority.append(m_regAuthority); 1328 } 1329 } 1330 return authority.toString(); 1331 } 1332 1333 /** 1334 * Get the path for this URI (optionally with the query string and 1335 * fragment). 1336 * 1337 * @param p_includeQueryString if true (and query string is not null), 1338 * then a "?" followed by the query string 1339 * will be appended 1340 * @param p_includeFragment if true (and fragment is not null), 1341 * then a "#" followed by the fragment 1342 * will be appended 1343 * 1344 * @return the path for this URI possibly including the query string 1345 * and fragment 1346 */ 1347 public String getPath(boolean p_includeQueryString, 1348 boolean p_includeFragment) { 1349 final StringBuilder pathString = new StringBuilder(m_path); 1350 1351 if (p_includeQueryString && m_queryString != null) { 1352 pathString.append('?'); 1353 pathString.append(m_queryString); 1354 } 1355 1356 if (p_includeFragment && m_fragment != null) { 1357 pathString.append('#'); 1358 pathString.append(m_fragment); 1359 } 1360 return pathString.toString(); 1361 } 1362 1363 /** 1364 * Get the path for this URI. Note that the value returned is the path 1365 * only and does not include the query string or fragment. 1366 * 1367 * @return the path for this URI. 1368 */ 1369 public String getPath() { 1370 return m_path; 1371 } 1372 1373 /** 1374 * Get the query string for this URI. 1375 * 1376 * @return the query string for this URI. Null is returned if there 1377 * was no "?" in the URI spec, empty string if there was a 1378 * "?" but no query string following it. 1379 */ 1380 public String getQueryString() { 1381 return m_queryString; 1382 } 1383 1384 /** 1385 * Get the fragment for this URI. 1386 * 1387 * @return the fragment for this URI. Null is returned if there 1388 * was no "#" in the URI spec, empty string if there was a 1389 * "#" but no fragment following it. 1390 */ 1391 public String getFragment() { 1392 return m_fragment; 1393 } 1394 1395 /** 1396 * Set the scheme for this URI. The scheme is converted to lowercase 1397 * before it is set. 1398 * 1399 * @param p_scheme the scheme for this URI (cannot be null) 1400 * 1401 * @exception MalformedURIException if p_scheme is not a conformant 1402 * scheme name 1403 */ 1404 public void setScheme(String p_scheme) throws MalformedURIException { 1405 if (p_scheme == null) { 1406 throw new MalformedURIException( 1407 "Cannot set scheme from null string!"); 1408 } 1409 if (!isConformantSchemeName(p_scheme)) { 1410 throw new MalformedURIException("The scheme is not conformant."); 1411 } 1412 1413 m_scheme = p_scheme.toLowerCase(); 1414 } 1415 1416 /** 1417 * Set the userinfo for this URI. If a non-null value is passed in and 1418 * the host value is null, then an exception is thrown. 1419 * 1420 * @param p_userinfo the userinfo for this URI 1421 * 1422 * @exception MalformedURIException if p_userinfo contains invalid 1423 * characters 1424 */ 1425 public void setUserinfo(String p_userinfo) throws MalformedURIException { 1426 if (p_userinfo == null) { 1427 m_userinfo = null; 1428 return; 1429 } 1430 else { 1431 if (m_host == null) { 1432 throw new MalformedURIException( 1433 "Userinfo cannot be set when host is null!"); 1434 } 1435 1436 // userinfo can contain alphanumerics, mark characters, escaped 1437 // and ';',':','&','=','+','$',',' 1438 int index = 0; 1439 int end = p_userinfo.length(); 1440 char testChar = '\0'; 1441 while (index < end) { 1442 testChar = p_userinfo.charAt(index); 1443 if (testChar == '%') { 1444 if (index+2 >= end || 1445 !isHex(p_userinfo.charAt(index+1)) || 1446 !isHex(p_userinfo.charAt(index+2))) { 1447 throw new MalformedURIException( 1448 "Userinfo contains invalid escape sequence!"); 1449 } 1450 } 1451 else if (!isUserinfoCharacter(testChar)) { 1452 throw new MalformedURIException( 1453 "Userinfo contains invalid character:"+testChar); 1454 } 1455 index++; 1456 } 1457 } 1458 m_userinfo = p_userinfo; 1459 } 1460 1461 /** 1462 * <p>Set the host for this URI. If null is passed in, the userinfo 1463 * field is also set to null and the port is set to -1.</p> 1464 * 1465 * <p>Note: This method overwrites registry based authority if it 1466 * previously existed in this URI.</p> 1467 * 1468 * @param p_host the host for this URI 1469 * 1470 * @exception MalformedURIException if p_host is not a valid IP 1471 * address or DNS hostname. 1472 */ 1473 public void setHost(String p_host) throws MalformedURIException { 1474 if (p_host == null || p_host.length() == 0) { 1475 if (p_host != null) { 1476 m_regAuthority = null; 1477 } 1478 m_host = p_host; 1479 m_userinfo = null; 1480 m_port = -1; 1481 return; 1482 } 1483 else if (!isWellFormedAddress(p_host)) { 1484 throw new MalformedURIException("Host is not a well formed address!"); 1485 } 1486 m_host = p_host; 1487 m_regAuthority = null; 1488 } 1489 1490 /** 1491 * Set the port for this URI. -1 is used to indicate that the port is 1492 * not specified, otherwise valid port numbers are between 0 and 65535. 1493 * If a valid port number is passed in and the host field is null, 1494 * an exception is thrown. 1495 * 1496 * @param p_port the port number for this URI 1497 * 1498 * @exception MalformedURIException if p_port is not -1 and not a 1499 * valid port number 1500 */ 1501 public void setPort(int p_port) throws MalformedURIException { 1502 if (p_port >= 0 && p_port <= 65535) { 1503 if (m_host == null) { 1504 throw new MalformedURIException( 1505 "Port cannot be set when host is null!"); 1506 } 1507 } 1508 else if (p_port != -1) { 1509 throw new MalformedURIException("Invalid port number!"); 1510 } 1511 m_port = p_port; 1512 } 1513 1514 /** 1515 * <p>Sets the registry based authority for this URI.</p> 1516 * 1517 * <p>Note: This method overwrites server based authority 1518 * if it previously existed in this URI.</p> 1519 * 1520 * @param authority the registry based authority for this URI 1521 * 1522 * @exception MalformedURIException it authority is not a 1523 * well formed registry based authority 1524 */ 1525 public void setRegBasedAuthority(String authority) 1526 throws MalformedURIException { 1527 1528 if (authority == null) { 1529 m_regAuthority = null; 1530 return; 1531 } 1532 // reg_name = 1*( unreserved | escaped | "$" | "," | 1533 // ";" | ":" | "@" | "&" | "=" | "+" ) 1534 else if (authority.length() < 1 || 1535 !isValidRegistryBasedAuthority(authority) || 1536 authority.indexOf('/') != -1) { 1537 throw new MalformedURIException("Registry based authority is not well formed."); 1538 } 1539 m_regAuthority = authority; 1540 m_host = null; 1541 m_userinfo = null; 1542 m_port = -1; 1543 } 1544 1545 /** 1546 * Set the path for this URI. If the supplied path is null, then the 1547 * query string and fragment are set to null as well. If the supplied 1548 * path includes a query string and/or fragment, these fields will be 1549 * parsed and set as well. Note that, for URIs following the "generic 1550 * URI" syntax, the path specified should start with a slash. 1551 * For URIs that do not follow the generic URI syntax, this method 1552 * sets the scheme-specific part. 1553 * 1554 * @param p_path the path for this URI (may be null) 1555 * 1556 * @exception MalformedURIException if p_path contains invalid 1557 * characters 1558 */ 1559 public void setPath(String p_path) throws MalformedURIException { 1560 if (p_path == null) { 1561 m_path = null; 1562 m_queryString = null; 1563 m_fragment = null; 1564 } 1565 else { 1566 initializePath(p_path, 0); 1567 } 1568 } 1569 1570 /** 1571 * Append to the end of the path of this URI. If the current path does 1572 * not end in a slash and the path to be appended does not begin with 1573 * a slash, a slash will be appended to the current path before the 1574 * new segment is added. Also, if the current path ends in a slash 1575 * and the new segment begins with a slash, the extra slash will be 1576 * removed before the new segment is appended. 1577 * 1578 * @param p_addToPath the new segment to be added to the current path 1579 * 1580 * @exception MalformedURIException if p_addToPath contains syntax 1581 * errors 1582 */ 1583 public void appendPath(String p_addToPath) 1584 throws MalformedURIException { 1585 if (p_addToPath == null || p_addToPath.trim().length() == 0) { 1586 return; 1587 } 1588 1589 if (!isURIString(p_addToPath)) { 1590 throw new MalformedURIException( 1591 "Path contains invalid character!"); 1592 } 1593 1594 if (m_path == null || m_path.trim().length() == 0) { 1595 if (p_addToPath.startsWith("/")) { 1596 m_path = p_addToPath; 1597 } 1598 else { 1599 m_path = "/" + p_addToPath; 1600 } 1601 } 1602 else if (m_path.endsWith("/")) { 1603 if (p_addToPath.startsWith("/")) { 1604 m_path = m_path.concat(p_addToPath.substring(1)); 1605 } 1606 else { 1607 m_path = m_path.concat(p_addToPath); 1608 } 1609 } 1610 else { 1611 if (p_addToPath.startsWith("/")) { 1612 m_path = m_path.concat(p_addToPath); 1613 } 1614 else { 1615 m_path = m_path.concat("/" + p_addToPath); 1616 } 1617 } 1618 } 1619 1620 /** 1621 * Set the query string for this URI. A non-null value is valid only 1622 * if this is an URI conforming to the generic URI syntax and 1623 * the path value is not null. 1624 * 1625 * @param p_queryString the query string for this URI 1626 * 1627 * @exception MalformedURIException if p_queryString is not null and this 1628 * URI does not conform to the generic 1629 * URI syntax or if the path is null 1630 */ 1631 public void setQueryString(String p_queryString) throws MalformedURIException { 1632 if (p_queryString == null) { 1633 m_queryString = null; 1634 } 1635 else if (!isGenericURI()) { 1636 throw new MalformedURIException( 1637 "Query string can only be set for a generic URI!"); 1638 } 1639 else if (getPath() == null) { 1640 throw new MalformedURIException( 1641 "Query string cannot be set when path is null!"); 1642 } 1643 else if (!isURIString(p_queryString)) { 1644 throw new MalformedURIException( 1645 "Query string contains invalid character!"); 1646 } 1647 else { 1648 m_queryString = p_queryString; 1649 } 1650 } 1651 1652 /** 1653 * Set the fragment for this URI. A non-null value is valid only 1654 * if this is a URI conforming to the generic URI syntax and 1655 * the path value is not null. 1656 * 1657 * @param p_fragment the fragment for this URI 1658 * 1659 * @exception MalformedURIException if p_fragment is not null and this 1660 * URI does not conform to the generic 1661 * URI syntax or if the path is null 1662 */ 1663 public void setFragment(String p_fragment) throws MalformedURIException { 1664 if (p_fragment == null) { 1665 m_fragment = null; 1666 } 1667 else if (!isGenericURI()) { 1668 throw new MalformedURIException( 1669 "Fragment can only be set for a generic URI!"); 1670 } 1671 else if (getPath() == null) { 1672 throw new MalformedURIException( 1673 "Fragment cannot be set when path is null!"); 1674 } 1675 else if (!isURIString(p_fragment)) { 1676 throw new MalformedURIException( 1677 "Fragment contains invalid character!"); 1678 } 1679 else { 1680 m_fragment = p_fragment; 1681 } 1682 } 1683 1684 /** 1685 * Determines if the passed-in Object is equivalent to this URI. 1686 * 1687 * @param p_test the Object to test for equality. 1688 * 1689 * @return true if p_test is a URI with all values equal to this 1690 * URI, false otherwise 1691 */ 1692 @Override 1693 public boolean equals(Object p_test) { 1694 if (p_test instanceof URI) { 1695 URI testURI = (URI) p_test; 1696 if (((m_scheme == null && testURI.m_scheme == null) || 1697 (m_scheme != null && testURI.m_scheme != null && 1698 m_scheme.equals(testURI.m_scheme))) && 1699 ((m_userinfo == null && testURI.m_userinfo == null) || 1700 (m_userinfo != null && testURI.m_userinfo != null && 1701 m_userinfo.equals(testURI.m_userinfo))) && 1702 ((m_host == null && testURI.m_host == null) || 1703 (m_host != null && testURI.m_host != null && 1704 m_host.equals(testURI.m_host))) && 1705 m_port == testURI.m_port && 1706 ((m_path == null && testURI.m_path == null) || 1707 (m_path != null && testURI.m_path != null && 1708 m_path.equals(testURI.m_path))) && 1709 ((m_queryString == null && testURI.m_queryString == null) || 1710 (m_queryString != null && testURI.m_queryString != null && 1711 m_queryString.equals(testURI.m_queryString))) && 1712 ((m_fragment == null && testURI.m_fragment == null) || 1713 (m_fragment != null && testURI.m_fragment != null && 1714 m_fragment.equals(testURI.m_fragment)))) { 1715 return true; 1716 } 1717 } 1718 return false; 1719 } 1720 1721 @Override 1722 public int hashCode() { 1723 int hash = 5; 1724 hash = 47 * hash + Objects.hashCode(this.m_scheme); 1725 hash = 47 * hash + Objects.hashCode(this.m_userinfo); 1726 hash = 47 * hash + Objects.hashCode(this.m_host); 1727 hash = 47 * hash + this.m_port; 1728 hash = 47 * hash + Objects.hashCode(this.m_path); 1729 hash = 47 * hash + Objects.hashCode(this.m_queryString); 1730 hash = 47 * hash + Objects.hashCode(this.m_fragment); 1731 return hash; 1732 } 1733 1734 /** 1735 * Get the URI as a string specification. See RFC 2396 Section 5.2. 1736 * 1737 * @return the URI string specification 1738 */ 1739 @Override 1740 public String toString() { 1741 final StringBuilder uriSpecString = new StringBuilder(); 1742 1743 if (m_scheme != null) { 1744 uriSpecString.append(m_scheme); 1745 uriSpecString.append(':'); 1746 } 1747 uriSpecString.append(getSchemeSpecificPart()); 1748 return uriSpecString.toString(); 1749 } 1750 1751 /** 1752 * Get the indicator as to whether this URI uses the "generic URI" 1753 * syntax. 1754 * 1755 * @return true if this URI uses the "generic URI" syntax, false 1756 * otherwise 1757 */ 1758 public boolean isGenericURI() { 1759 // presence of the host (whether valid or empty) means 1760 // double-slashes which means generic uri 1761 return (m_host != null); 1762 } 1763 1764 /** 1765 * Returns whether this URI represents an absolute URI. 1766 * 1767 * @return true if this URI represents an absolute URI, false 1768 * otherwise 1769 */ 1770 public boolean isAbsoluteURI() { 1771 // presence of the scheme means absolute uri 1772 return (m_scheme != null); 1773 } 1774 1775 /** 1776 * Determine whether a scheme conforms to the rules for a scheme name. 1777 * A scheme is conformant if it starts with an alphanumeric, and 1778 * contains only alphanumerics, '+','-' and '.'. 1779 * 1780 * @return true if the scheme is conformant, false otherwise 1781 */ 1782 public static boolean isConformantSchemeName(String p_scheme) { 1783 if (p_scheme == null || p_scheme.trim().length() == 0) { 1784 return false; 1785 } 1786 1787 if (!isAlpha(p_scheme.charAt(0))) { 1788 return false; 1789 } 1790 1791 char testChar; 1792 int schemeLength = p_scheme.length(); 1793 for (int i = 1; i < schemeLength; ++i) { 1794 testChar = p_scheme.charAt(i); 1795 if (!isSchemeCharacter(testChar)) { 1796 return false; 1797 } 1798 } 1799 1800 return true; 1801 } 1802 1803 /** 1804 * Determine whether a string is syntactically capable of representing 1805 * a valid IPv4 address, IPv6 reference or the domain name of a network host. 1806 * A valid IPv4 address consists of four decimal digit groups separated by a 1807 * '.'. Each group must consist of one to three digits. See RFC 2732 Section 3, 1808 * and RFC 2373 Section 2.2, for the definition of IPv6 references. A hostname 1809 * consists of domain labels (each of which must begin and end with an alphanumeric 1810 * but may contain '-') separated & by a '.'. See RFC 2396 Section 3.2.2. 1811 * 1812 * @return true if the string is a syntactically valid IPv4 address, 1813 * IPv6 reference or hostname 1814 */ 1815 public static boolean isWellFormedAddress(String address) { 1816 if (address == null) { 1817 return false; 1818 } 1819 1820 int addrLength = address.length(); 1821 if (addrLength == 0) { 1822 return false; 1823 } 1824 1825 // Check if the host is a valid IPv6reference. 1826 if (address.startsWith("[")) { 1827 return isWellFormedIPv6Reference(address); 1828 } 1829 1830 // Cannot start with a '.', '-', or end with a '-'. 1831 if (address.startsWith(".") || 1832 address.startsWith("-") || 1833 address.endsWith("-")) { 1834 return false; 1835 } 1836 1837 // rightmost domain label starting with digit indicates IP address 1838 // since top level domain label can only start with an alpha 1839 // see RFC 2396 Section 3.2.2 1840 int index = address.lastIndexOf('.'); 1841 if (address.endsWith(".")) { 1842 index = address.substring(0, index).lastIndexOf('.'); 1843 } 1844 1845 if (index+1 < addrLength && isDigit(address.charAt(index+1))) { 1846 return isWellFormedIPv4Address(address); 1847 } 1848 else { 1849 // hostname = *( domainlabel "." ) toplabel [ "." ] 1850 // domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum 1851 // toplabel = alpha | alpha *( alphanum | "-" ) alphanum 1852 1853 // RFC 2396 states that hostnames take the form described in 1854 // RFC 1034 (Section 3) and RFC 1123 (Section 2.1). According 1855 // to RFC 1034, hostnames are limited to 255 characters. 1856 if (addrLength > 255) { 1857 return false; 1858 } 1859 1860 // domain labels can contain alphanumerics and '-" 1861 // but must start and end with an alphanumeric 1862 char testChar; 1863 int labelCharCount = 0; 1864 1865 for (int i = 0; i < addrLength; i++) { 1866 testChar = address.charAt(i); 1867 if (testChar == '.') { 1868 if (!isAlphanum(address.charAt(i-1))) { 1869 return false; 1870 } 1871 if (i+1 < addrLength && !isAlphanum(address.charAt(i+1))) { 1872 return false; 1873 } 1874 labelCharCount = 0; 1875 } 1876 else if (!isAlphanum(testChar) && testChar != '-') { 1877 return false; 1878 } 1879 // RFC 1034: Labels must be 63 characters or less. 1880 else if (++labelCharCount > 63) { 1881 return false; 1882 } 1883 } 1884 } 1885 return true; 1886 } 1887 1888 /** 1889 * <p>Determines whether a string is an IPv4 address as defined by 1890 * RFC 2373, and under the further constraint that it must be a 32-bit 1891 * address. Though not expressed in the grammar, in order to satisfy 1892 * the 32-bit address constraint, each segment of the address cannot 1893 * be greater than 255 (8 bits of information).</p> 1894 * 1895 * <p><code>IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT</code></p> 1896 * 1897 * @return true if the string is a syntactically valid IPv4 address 1898 */ 1899 public static boolean isWellFormedIPv4Address(String address) { 1900 1901 int addrLength = address.length(); 1902 char testChar; 1903 int numDots = 0; 1904 int numDigits = 0; 1905 1906 // make sure that 1) we see only digits and dot separators, 2) that 1907 // any dot separator is preceded and followed by a digit and 1908 // 3) that we find 3 dots 1909 // 1910 // RFC 2732 amended RFC 2396 by replacing the definition 1911 // of IPv4address with the one defined by RFC 2373. - mrglavas 1912 // 1913 // IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT 1914 // 1915 // One to three digits must be in each segment. 1916 for (int i = 0; i < addrLength; i++) { 1917 testChar = address.charAt(i); 1918 if (testChar == '.') { 1919 if ((i > 0 && !isDigit(address.charAt(i-1))) || 1920 (i+1 < addrLength && !isDigit(address.charAt(i+1)))) { 1921 return false; 1922 } 1923 numDigits = 0; 1924 if (++numDots > 3) { 1925 return false; 1926 } 1927 } 1928 else if (!isDigit(testChar)) { 1929 return false; 1930 } 1931 // Check that that there are no more than three digits 1932 // in this segment. 1933 else if (++numDigits > 3) { 1934 return false; 1935 } 1936 // Check that this segment is not greater than 255. 1937 else if (numDigits == 3) { 1938 char first = address.charAt(i-2); 1939 char second = address.charAt(i-1); 1940 if (!(first < '2' || 1941 (first == '2' && 1942 (second < '5' || 1943 (second == '5' && testChar <= '5'))))) { 1944 return false; 1945 } 1946 } 1947 } 1948 return (numDots == 3); 1949 } 1950 1951 /** 1952 * <p>Determines whether a string is an IPv6 reference as defined 1953 * by RFC 2732, where IPv6address is defined in RFC 2373. The 1954 * IPv6 address is parsed according to Section 2.2 of RFC 2373, 1955 * with the additional constraint that the address be composed of 1956 * 128 bits of information.</p> 1957 * 1958 * <p><code>IPv6reference = "[" IPv6address "]"</code></p> 1959 * 1960 * <p>Note: The BNF expressed in RFC 2373 Appendix B does not 1961 * accurately describe section 2.2, and was in fact removed from 1962 * RFC 3513, the successor of RFC 2373.</p> 1963 * 1964 * @return true if the string is a syntactically valid IPv6 reference 1965 */ 1966 public static boolean isWellFormedIPv6Reference(String address) { 1967 1968 int addrLength = address.length(); 1969 int index = 1; 1970 int end = addrLength-1; 1971 1972 // Check if string is a potential match for IPv6reference. 1973 if (!(addrLength > 2 && address.charAt(0) == '[' 1974 && address.charAt(end) == ']')) { 1975 return false; 1976 } 1977 1978 // Counter for the number of 16-bit sections read in the address. 1979 int [] counter = new int[1]; 1980 1981 // Scan hex sequence before possible '::' or IPv4 address. 1982 index = scanHexSequence(address, index, end, counter); 1983 if (index == -1) { 1984 return false; 1985 } 1986 // Address must contain 128-bits of information. 1987 else if (index == end) { 1988 return (counter[0] == 8); 1989 } 1990 1991 if (index+1 < end && address.charAt(index) == ':') { 1992 if (address.charAt(index+1) == ':') { 1993 // '::' represents at least one 16-bit group of zeros. 1994 if (++counter[0] > 8) { 1995 return false; 1996 } 1997 index += 2; 1998 // Trailing zeros will fill out the rest of the address. 1999 if (index == end) { 2000 return true; 2001 } 2002 } 2003 // If the second character wasn't ':', in order to be valid, 2004 // the remainder of the string must match IPv4Address, 2005 // and we must have read exactly 6 16-bit groups. 2006 else { 2007 return (counter[0] == 6) && 2008 isWellFormedIPv4Address(address.substring(index+1, end)); 2009 } 2010 } 2011 else { 2012 return false; 2013 } 2014 2015 // 3. Scan hex sequence after '::'. 2016 int prevCount = counter[0]; 2017 index = scanHexSequence(address, index, end, counter); 2018 2019 // We've either reached the end of the string, the address ends in 2020 // an IPv4 address, or it is invalid. scanHexSequence has already 2021 // made sure that we have the right number of bits. 2022 return (index == end) || 2023 (index != -1 && isWellFormedIPv4Address( 2024 address.substring((counter[0] > prevCount) ? index+1 : index, end))); 2025 } 2026 2027 /** 2028 * Helper method for isWellFormedIPv6Reference which scans the 2029 * hex sequences of an IPv6 address. It returns the index of the 2030 * next character to scan in the address, or -1 if the string 2031 * cannot match a valid IPv6 address. 2032 * 2033 * @param address the string to be scanned 2034 * @param index the beginning index (inclusive) 2035 * @param end the ending index (exclusive) 2036 * @param counter a counter for the number of 16-bit sections read 2037 * in the address 2038 * 2039 * @return the index of the next character to scan, or -1 if the 2040 * string cannot match a valid IPv6 address 2041 */ 2042 private static int scanHexSequence (String address, int index, int end, int [] counter) { 2043 2044 char testChar; 2045 int numDigits = 0; 2046 int start = index; 2047 2048 // Trying to match the following productions: 2049 // hexseq = hex4 *( ":" hex4) 2050 // hex4 = 1*4HEXDIG 2051 for (; index < end; ++index) { 2052 testChar = address.charAt(index); 2053 if (testChar == ':') { 2054 // IPv6 addresses are 128-bit, so there can be at most eight sections. 2055 if (numDigits > 0 && ++counter[0] > 8) { 2056 return -1; 2057 } 2058 // This could be '::'. 2059 if (numDigits == 0 || ((index+1 < end) && address.charAt(index+1) == ':')) { 2060 return index; 2061 } 2062 numDigits = 0; 2063 } 2064 // This might be invalid or an IPv4address. If it's potentially an IPv4address, 2065 // backup to just after the last valid character that matches hexseq. 2066 else if (!isHex(testChar)) { 2067 if (testChar == '.' && numDigits < 4 && numDigits > 0 && counter[0] <= 6) { 2068 int back = index - numDigits - 1; 2069 return (back >= start) ? back : (back+1); 2070 } 2071 return -1; 2072 } 2073 // There can be at most 4 hex digits per group. 2074 else if (++numDigits > 4) { 2075 return -1; 2076 } 2077 } 2078 return (numDigits > 0 && ++counter[0] <= 8) ? end : -1; 2079 } 2080 2081 2082 /** 2083 * Determine whether a char is a digit. 2084 * 2085 * @return true if the char is betweeen '0' and '9', false otherwise 2086 */ 2087 private static boolean isDigit(char p_char) { 2088 return p_char >= '0' && p_char <= '9'; 2089 } 2090 2091 /** 2092 * Determine whether a character is a hexadecimal character. 2093 * 2094 * @return true if the char is betweeen '0' and '9', 'a' and 'f' 2095 * or 'A' and 'F', false otherwise 2096 */ 2097 private static boolean isHex(char p_char) { 2098 return (p_char <= 'f' && (fgLookupTable[p_char] & ASCII_HEX_CHARACTERS) != 0); 2099 } 2100 2101 /** 2102 * Determine whether a char is an alphabetic character: a-z or A-Z 2103 * 2104 * @return true if the char is alphabetic, false otherwise 2105 */ 2106 private static boolean isAlpha(char p_char) { 2107 return ((p_char >= 'a' && p_char <= 'z') || (p_char >= 'A' && p_char <= 'Z' )); 2108 } 2109 2110 /** 2111 * Determine whether a char is an alphanumeric: 0-9, a-z or A-Z 2112 * 2113 * @return true if the char is alphanumeric, false otherwise 2114 */ 2115 private static boolean isAlphanum(char p_char) { 2116 return (p_char <= 'z' && (fgLookupTable[p_char] & MASK_ALPHA_NUMERIC) != 0); 2117 } 2118 2119 /** 2120 * Determine whether a character is a reserved character: 2121 * ';', '/', '?', ':', '@', '&', '=', '+', '$', ',', '[', or ']' 2122 * 2123 * @return true if the string contains any reserved characters 2124 */ 2125 private static boolean isReservedCharacter(char p_char) { 2126 return (p_char <= ']' && (fgLookupTable[p_char] & RESERVED_CHARACTERS) != 0); 2127 } 2128 2129 /** 2130 * Determine whether a char is an unreserved character. 2131 * 2132 * @return true if the char is unreserved, false otherwise 2133 */ 2134 private static boolean isUnreservedCharacter(char p_char) { 2135 return (p_char <= '~' && (fgLookupTable[p_char] & MASK_UNRESERVED_MASK) != 0); 2136 } 2137 2138 /** 2139 * Determine whether a char is a URI character (reserved or 2140 * unreserved, not including '%' for escaped octets). 2141 * 2142 * @return true if the char is a URI character, false otherwise 2143 */ 2144 private static boolean isURICharacter (char p_char) { 2145 return (p_char <= '~' && (fgLookupTable[p_char] & MASK_URI_CHARACTER) != 0); 2146 } 2147 2148 /** 2149 * Determine whether a char is a scheme character. 2150 * 2151 * @return true if the char is a scheme character, false otherwise 2152 */ 2153 private static boolean isSchemeCharacter (char p_char) { 2154 return (p_char <= 'z' && (fgLookupTable[p_char] & MASK_SCHEME_CHARACTER) != 0); 2155 } 2156 2157 /** 2158 * Determine whether a char is a userinfo character. 2159 * 2160 * @return true if the char is a userinfo character, false otherwise 2161 */ 2162 private static boolean isUserinfoCharacter (char p_char) { 2163 return (p_char <= 'z' && (fgLookupTable[p_char] & MASK_USERINFO_CHARACTER) != 0); 2164 } 2165 2166 /** 2167 * Determine whether a char is a path character. 2168 * 2169 * @return true if the char is a path character, false otherwise 2170 */ 2171 private static boolean isPathCharacter (char p_char) { 2172 return (p_char <= '~' && (fgLookupTable[p_char] & MASK_PATH_CHARACTER) != 0); 2173 } 2174 2175 2176 /** 2177 * Determine whether a given string contains only URI characters (also 2178 * called "uric" in RFC 2396). uric consist of all reserved 2179 * characters, unreserved characters and escaped characters. 2180 * 2181 * @return true if the string is comprised of uric, false otherwise 2182 */ 2183 private static boolean isURIString(String p_uric) { 2184 if (p_uric == null) { 2185 return false; 2186 } 2187 int end = p_uric.length(); 2188 char testChar = '\0'; 2189 for (int i = 0; i < end; i++) { 2190 testChar = p_uric.charAt(i); 2191 if (testChar == '%') { 2192 if (i+2 >= end || 2193 !isHex(p_uric.charAt(i+1)) || 2194 !isHex(p_uric.charAt(i+2))) { 2195 return false; 2196 } 2197 else { 2198 i += 2; 2199 continue; 2200 } 2201 } 2202 if (isURICharacter(testChar)) { 2203 continue; 2204 } 2205 else { 2206 return false; 2207 } 2208 } 2209 return true; 2210 } 2211 }