1 /* 2 * reserved comment block 3 * DO NOT REMOVE OR ALTER! 4 */ 5 /* 6 * Copyright 1999-2005 The Apache Software Foundation. 7 * 8 * Licensed under the Apache License, Version 2.0 (the "License"); 9 * you may not use this file except in compliance with the License. 10 * You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 package com.sun.org.apache.xerces.internal.util; 22 23 import com.sun.org.apache.xerces.internal.utils.Objects; 24 import java.io.IOException; 25 import java.io.Serializable; 26 27 /********************************************************************** 28 * A class to represent a Uniform Resource Identifier (URI). This class 29 * is designed to handle the parsing of URIs and provide access to 30 * the various components (scheme, host, port, userinfo, path, query 31 * string and fragment) that may constitute a URI. 32 * <p> 33 * Parsing of a URI specification is done according to the URI 34 * syntax described in 35 * <a href="http://www.ietf.org/rfc/rfc2396.txt?number=2396">RFC 2396</a>, 36 * and amended by 37 * <a href="http://www.ietf.org/rfc/rfc2732.txt?number=2732">RFC 2732</a>. 38 * <p> 39 * Every absolute URI consists of a scheme, followed by a colon (':'), 40 * followed by a scheme-specific part. For URIs that follow the 41 * "generic URI" syntax, the scheme-specific part begins with two 42 * slashes ("//") and may be followed by an authority segment (comprised 43 * of user information, host, and port), path segment, query segment 44 * and fragment. Note that RFC 2396 no longer specifies the use of the 45 * parameters segment and excludes the "user:password" syntax as part of 46 * the authority segment. If "user:password" appears in a URI, the entire 47 * user/password string is stored as userinfo. 48 * <p> 49 * For URIs that do not follow the "generic URI" syntax (e.g. mailto), 50 * the entire scheme-specific part is treated as the "path" portion 51 * of the URI. 52 * <p> 53 * Note that, unlike the java.net.URL class, this class does not provide 54 * any built-in network access functionality nor does it provide any 55 * scheme-specific functionality (for example, it does not know a 56 * default port for a specific scheme). Rather, it only knows the 57 * grammar and basic set of operations that can be applied to a URI. 58 * 59 * 60 **********************************************************************/ 61 public class URI implements Serializable { 62 63 /******************************************************************* 64 * MalformedURIExceptions are thrown in the process of building a URI 65 * or setting fields on a URI when an operation would result in an 66 * invalid URI specification. 67 * 68 ********************************************************************/ 69 public static class MalformedURIException extends IOException { 70 71 /** Serialization version. */ 72 static final long serialVersionUID = -6695054834342951930L; 73 74 /****************************************************************** 75 * Constructs a <code>MalformedURIException</code> with no specified 76 * detail message. 77 ******************************************************************/ 78 public MalformedURIException() { 79 super(); 80 } 81 82 /***************************************************************** 83 * Constructs a <code>MalformedURIException</code> with the 84 * specified detail message. 85 * 86 * @param p_msg the detail message. 87 ******************************************************************/ 88 public MalformedURIException(String p_msg) { 89 super(p_msg); 90 } 91 } 92 93 /** Serialization version. */ 94 static final long serialVersionUID = 1601921774685357214L; 95 96 private static final byte [] fgLookupTable = new byte[128]; 97 98 /** 99 * Character Classes 100 */ 101 102 /** reserved characters ;/?:@&=+$,[] */ 103 //RFC 2732 added '[' and ']' as reserved characters 104 private static final int RESERVED_CHARACTERS = 0x01; 105 106 /** URI punctuation mark characters: -_.!~*'() - these, combined with 107 alphanumerics, constitute the "unreserved" characters */ 108 private static final int MARK_CHARACTERS = 0x02; 109 110 /** scheme can be composed of alphanumerics and these characters: +-. */ 111 private static final int SCHEME_CHARACTERS = 0x04; 112 113 /** userinfo can be composed of unreserved, escaped and these 114 characters: ;:&=+$, */ 115 private static final int USERINFO_CHARACTERS = 0x08; 116 117 /** ASCII letter characters */ 118 private static final int ASCII_ALPHA_CHARACTERS = 0x10; 119 120 /** ASCII digit characters */ 121 private static final int ASCII_DIGIT_CHARACTERS = 0x20; 122 123 /** ASCII hex characters */ 124 private static final int ASCII_HEX_CHARACTERS = 0x40; 125 126 /** Path characters */ 127 private static final int PATH_CHARACTERS = 0x80; 128 129 /** Mask for alpha-numeric characters */ 130 private static final int MASK_ALPHA_NUMERIC = ASCII_ALPHA_CHARACTERS | ASCII_DIGIT_CHARACTERS; 131 132 /** Mask for unreserved characters */ 133 private static final int MASK_UNRESERVED_MASK = MASK_ALPHA_NUMERIC | MARK_CHARACTERS; 134 135 /** Mask for URI allowable characters except for % */ 136 private static final int MASK_URI_CHARACTER = MASK_UNRESERVED_MASK | RESERVED_CHARACTERS; 137 138 /** Mask for scheme characters */ 139 private static final int MASK_SCHEME_CHARACTER = MASK_ALPHA_NUMERIC | SCHEME_CHARACTERS; 140 141 /** Mask for userinfo characters */ 142 private static final int MASK_USERINFO_CHARACTER = MASK_UNRESERVED_MASK | USERINFO_CHARACTERS; 143 144 /** Mask for path characters */ 145 private static final int MASK_PATH_CHARACTER = MASK_UNRESERVED_MASK | PATH_CHARACTERS; 146 147 static { 148 // Add ASCII Digits and ASCII Hex Numbers 149 for (int i = '0'; i <= '9'; ++i) { 150 fgLookupTable[i] |= ASCII_DIGIT_CHARACTERS | ASCII_HEX_CHARACTERS; 151 } 152 153 // Add ASCII Letters and ASCII Hex Numbers 154 for (int i = 'A'; i <= 'F'; ++i) { 155 fgLookupTable[i] |= ASCII_ALPHA_CHARACTERS | ASCII_HEX_CHARACTERS; 156 fgLookupTable[i+0x00000020] |= ASCII_ALPHA_CHARACTERS | ASCII_HEX_CHARACTERS; 157 } 158 159 // Add ASCII Letters 160 for (int i = 'G'; i <= 'Z'; ++i) { 161 fgLookupTable[i] |= ASCII_ALPHA_CHARACTERS; 162 fgLookupTable[i+0x00000020] |= ASCII_ALPHA_CHARACTERS; 163 } 164 165 // Add Reserved Characters 166 fgLookupTable[';'] |= RESERVED_CHARACTERS; 167 fgLookupTable['/'] |= RESERVED_CHARACTERS; 168 fgLookupTable['?'] |= RESERVED_CHARACTERS; 169 fgLookupTable[':'] |= RESERVED_CHARACTERS; 170 fgLookupTable['@'] |= RESERVED_CHARACTERS; 171 fgLookupTable['&'] |= RESERVED_CHARACTERS; 172 fgLookupTable['='] |= RESERVED_CHARACTERS; 173 fgLookupTable['+'] |= RESERVED_CHARACTERS; 174 fgLookupTable['$'] |= RESERVED_CHARACTERS; 175 fgLookupTable[','] |= RESERVED_CHARACTERS; 176 fgLookupTable['['] |= RESERVED_CHARACTERS; 177 fgLookupTable[']'] |= RESERVED_CHARACTERS; 178 179 // Add Mark Characters 180 fgLookupTable['-'] |= MARK_CHARACTERS; 181 fgLookupTable['_'] |= MARK_CHARACTERS; 182 fgLookupTable['.'] |= MARK_CHARACTERS; 183 fgLookupTable['!'] |= MARK_CHARACTERS; 184 fgLookupTable['~'] |= MARK_CHARACTERS; 185 fgLookupTable['*'] |= MARK_CHARACTERS; 186 fgLookupTable['\''] |= MARK_CHARACTERS; 187 fgLookupTable['('] |= MARK_CHARACTERS; 188 fgLookupTable[')'] |= MARK_CHARACTERS; 189 190 // Add Scheme Characters 191 fgLookupTable['+'] |= SCHEME_CHARACTERS; 192 fgLookupTable['-'] |= SCHEME_CHARACTERS; 193 fgLookupTable['.'] |= SCHEME_CHARACTERS; 194 195 // Add Userinfo Characters 196 fgLookupTable[';'] |= USERINFO_CHARACTERS; 197 fgLookupTable[':'] |= USERINFO_CHARACTERS; 198 fgLookupTable['&'] |= USERINFO_CHARACTERS; 199 fgLookupTable['='] |= USERINFO_CHARACTERS; 200 fgLookupTable['+'] |= USERINFO_CHARACTERS; 201 fgLookupTable['$'] |= USERINFO_CHARACTERS; 202 fgLookupTable[','] |= USERINFO_CHARACTERS; 203 204 // Add Path Characters 205 fgLookupTable[';'] |= PATH_CHARACTERS; 206 fgLookupTable['/'] |= PATH_CHARACTERS; 207 fgLookupTable[':'] |= PATH_CHARACTERS; 208 fgLookupTable['@'] |= PATH_CHARACTERS; 209 fgLookupTable['&'] |= PATH_CHARACTERS; 210 fgLookupTable['='] |= PATH_CHARACTERS; 211 fgLookupTable['+'] |= PATH_CHARACTERS; 212 fgLookupTable['$'] |= PATH_CHARACTERS; 213 fgLookupTable[','] |= PATH_CHARACTERS; 214 } 215 216 /** Stores the scheme (usually the protocol) for this URI. */ 217 private String m_scheme = null; 218 219 /** If specified, stores the userinfo for this URI; otherwise null */ 220 private String m_userinfo = null; 221 222 /** If specified, stores the host for this URI; otherwise null */ 223 private String m_host = null; 224 225 /** If specified, stores the port for this URI; otherwise -1 */ 226 private int m_port = -1; 227 228 /** If specified, stores the registry based authority for this URI; otherwise -1 */ 229 private String m_regAuthority = null; 230 231 /** If specified, stores the path for this URI; otherwise null */ 232 private String m_path = null; 233 234 /** If specified, stores the query string for this URI; otherwise 235 null. */ 236 private String m_queryString = null; 237 238 /** If specified, stores the fragment for this URI; otherwise null */ 239 private String m_fragment = null; 240 241 private static boolean DEBUG = false; 242 243 /** 244 * Construct a new and uninitialized URI. 245 */ 246 public URI() { 247 } 248 249 /** 250 * Construct a new URI from another URI. All fields for this URI are 251 * set equal to the fields of the URI passed in. 252 * 253 * @param p_other the URI to copy (cannot be null) 254 */ 255 public URI(URI p_other) { 256 initialize(p_other); 257 } 258 259 /** 260 * Construct a new URI from a URI specification string. If the 261 * specification follows the "generic URI" syntax, (two slashes 262 * following the first colon), the specification will be parsed 263 * accordingly - setting the scheme, userinfo, host,port, path, query 264 * string and fragment fields as necessary. If the specification does 265 * not follow the "generic URI" syntax, the specification is parsed 266 * into a scheme and scheme-specific part (stored as the path) only. 267 * 268 * @param p_uriSpec the URI specification string (cannot be null or 269 * empty) 270 * 271 * @exception MalformedURIException if p_uriSpec violates any syntax 272 * rules 273 */ 274 public URI(String p_uriSpec) throws MalformedURIException { 275 this((URI)null, p_uriSpec); 276 } 277 278 /** 279 * Construct a new URI from a URI specification string. If the 280 * specification follows the "generic URI" syntax, (two slashes 281 * following the first colon), the specification will be parsed 282 * accordingly - setting the scheme, userinfo, host,port, path, query 283 * string and fragment fields as necessary. If the specification does 284 * not follow the "generic URI" syntax, the specification is parsed 285 * into a scheme and scheme-specific part (stored as the path) only. 286 * Construct a relative URI if boolean is assigned to "true" 287 * and p_uriSpec is not valid absolute URI, instead of throwing an exception. 288 * 289 * @param p_uriSpec the URI specification string (cannot be null or 290 * empty) 291 * @param allowNonAbsoluteURI true to permit non-absolute URIs, 292 * false otherwise. 293 * 294 * @exception MalformedURIException if p_uriSpec violates any syntax 295 * rules 296 */ 297 public URI(String p_uriSpec, boolean allowNonAbsoluteURI) throws MalformedURIException { 298 this((URI)null, p_uriSpec, allowNonAbsoluteURI); 299 } 300 301 /** 302 * Construct a new URI from a base URI and a URI specification string. 303 * The URI specification string may be a relative URI. 304 * 305 * @param p_base the base URI (cannot be null if p_uriSpec is null or 306 * empty) 307 * @param p_uriSpec the URI specification string (cannot be null or 308 * empty if p_base is null) 309 * 310 * @exception MalformedURIException if p_uriSpec violates any syntax 311 * rules 312 */ 313 public URI(URI p_base, String p_uriSpec) throws MalformedURIException { 314 initialize(p_base, p_uriSpec); 315 } 316 317 /** 318 * Construct a new URI from a base URI and a URI specification string. 319 * The URI specification string may be a relative URI. 320 * Construct a relative URI if boolean is assigned to "true" 321 * and p_uriSpec is not valid absolute URI and p_base is null 322 * instead of throwing an exception. 323 * 324 * @param p_base the base URI (cannot be null if p_uriSpec is null or 325 * empty) 326 * @param p_uriSpec the URI specification string (cannot be null or 327 * empty if p_base is null) 328 * @param allowNonAbsoluteURI true to permit non-absolute URIs, 329 * false otherwise. 330 * 331 * @exception MalformedURIException if p_uriSpec violates any syntax 332 * rules 333 */ 334 public URI(URI p_base, String p_uriSpec, boolean allowNonAbsoluteURI) throws MalformedURIException { 335 initialize(p_base, p_uriSpec, allowNonAbsoluteURI); 336 } 337 338 /** 339 * Construct a new URI that does not follow the generic URI syntax. 340 * Only the scheme and scheme-specific part (stored as the path) are 341 * initialized. 342 * 343 * @param p_scheme the URI scheme (cannot be null or empty) 344 * @param p_schemeSpecificPart the scheme-specific part (cannot be 345 * null or empty) 346 * 347 * @exception MalformedURIException if p_scheme violates any 348 * syntax rules 349 */ 350 public URI(String p_scheme, String p_schemeSpecificPart) 351 throws MalformedURIException { 352 if (p_scheme == null || p_scheme.trim().length() == 0) { 353 throw new MalformedURIException( 354 "Cannot construct URI with null/empty scheme!"); 355 } 356 if (p_schemeSpecificPart == null || 357 p_schemeSpecificPart.trim().length() == 0) { 358 throw new MalformedURIException( 359 "Cannot construct URI with null/empty scheme-specific part!"); 360 } 361 setScheme(p_scheme); 362 setPath(p_schemeSpecificPart); 363 } 364 365 /** 366 * Construct a new URI that follows the generic URI syntax from its 367 * component parts. Each component is validated for syntax and some 368 * basic semantic checks are performed as well. See the individual 369 * setter methods for specifics. 370 * 371 * @param p_scheme the URI scheme (cannot be null or empty) 372 * @param p_host the hostname, IPv4 address or IPv6 reference for the URI 373 * @param p_path the URI path - if the path contains '?' or '#', 374 * then the query string and/or fragment will be 375 * set from the path; however, if the query and 376 * fragment are specified both in the path and as 377 * separate parameters, an exception is thrown 378 * @param p_queryString the URI query string (cannot be specified 379 * if path is null) 380 * @param p_fragment the URI fragment (cannot be specified if path 381 * is null) 382 * 383 * @exception MalformedURIException if any of the parameters violates 384 * syntax rules or semantic rules 385 */ 386 public URI(String p_scheme, String p_host, String p_path, 387 String p_queryString, String p_fragment) 388 throws MalformedURIException { 389 this(p_scheme, null, p_host, -1, p_path, p_queryString, p_fragment); 390 } 391 392 /** 393 * Construct a new URI that follows the generic URI syntax from its 394 * component parts. Each component is validated for syntax and some 395 * basic semantic checks are performed as well. See the individual 396 * setter methods for specifics. 397 * 398 * @param p_scheme the URI scheme (cannot be null or empty) 399 * @param p_userinfo the URI userinfo (cannot be specified if host 400 * is null) 401 * @param p_host the hostname, IPv4 address or IPv6 reference for the URI 402 * @param p_port the URI port (may be -1 for "unspecified"; cannot 403 * be specified if host is null) 404 * @param p_path the URI path - if the path contains '?' or '#', 405 * then the query string and/or fragment will be 406 * set from the path; however, if the query and 407 * fragment are specified both in the path and as 408 * separate parameters, an exception is thrown 409 * @param p_queryString the URI query string (cannot be specified 410 * if path is null) 411 * @param p_fragment the URI fragment (cannot be specified if path 412 * is null) 413 * 414 * @exception MalformedURIException if any of the parameters violates 415 * syntax rules or semantic rules 416 */ 417 public URI(String p_scheme, String p_userinfo, 418 String p_host, int p_port, String p_path, 419 String p_queryString, String p_fragment) 420 throws MalformedURIException { 421 if (p_scheme == null || p_scheme.trim().length() == 0) { 422 throw new MalformedURIException("Scheme is required!"); 423 } 424 425 if (p_host == null) { 426 if (p_userinfo != null) { 427 throw new MalformedURIException( 428 "Userinfo may not be specified if host is not specified!"); 429 } 430 if (p_port != -1) { 431 throw new MalformedURIException( 432 "Port may not be specified if host is not specified!"); 433 } 434 } 435 436 if (p_path != null) { 437 if (p_path.indexOf('?') != -1 && p_queryString != null) { 438 throw new MalformedURIException( 439 "Query string cannot be specified in path and query string!"); 440 } 441 442 if (p_path.indexOf('#') != -1 && p_fragment != null) { 443 throw new MalformedURIException( 444 "Fragment cannot be specified in both the path and fragment!"); 445 } 446 } 447 448 setScheme(p_scheme); 449 setHost(p_host); 450 setPort(p_port); 451 setUserinfo(p_userinfo); 452 setPath(p_path); 453 setQueryString(p_queryString); 454 setFragment(p_fragment); 455 } 456 457 /** 458 * Initialize all fields of this URI from another URI. 459 * 460 * @param p_other the URI to copy (cannot be null) 461 */ 462 private void initialize(URI p_other) { 463 m_scheme = p_other.getScheme(); 464 m_userinfo = p_other.getUserinfo(); 465 m_host = p_other.getHost(); 466 m_port = p_other.getPort(); 467 m_regAuthority = p_other.getRegBasedAuthority(); 468 m_path = p_other.getPath(); 469 m_queryString = p_other.getQueryString(); 470 m_fragment = p_other.getFragment(); 471 } 472 473 /** 474 * Initializes this URI from a base URI and a URI specification string. 475 * See RFC 2396 Section 4 and Appendix B for specifications on parsing 476 * the URI and Section 5 for specifications on resolving relative URIs 477 * and relative paths. 478 * 479 * @param p_base the base URI (may be null if p_uriSpec is an absolute 480 * URI) 481 * @param p_uriSpec the URI spec string which may be an absolute or 482 * relative URI (can only be null/empty if p_base 483 * is not null) 484 * @param allowNonAbsoluteURI true to permit non-absolute URIs, 485 * in case of relative URI, false otherwise. 486 * 487 * @exception MalformedURIException if p_base is null and p_uriSpec 488 * is not an absolute URI or if 489 * p_uriSpec violates syntax rules 490 */ 491 private void initialize(URI p_base, String p_uriSpec, boolean allowNonAbsoluteURI) 492 throws MalformedURIException { 493 494 String uriSpec = p_uriSpec; 495 int uriSpecLen = (uriSpec != null) ? uriSpec.length() : 0; 496 497 if (p_base == null && uriSpecLen == 0) { 498 if (allowNonAbsoluteURI) { 499 m_path = ""; 500 return; 501 } 502 throw new MalformedURIException("Cannot initialize URI with empty parameters."); 503 } 504 505 // just make a copy of the base if spec is empty 506 if (uriSpecLen == 0) { 507 initialize(p_base); 508 return; 509 } 510 511 int index = 0; 512 513 // Check for scheme, which must be before '/', '?' or '#'. 514 int colonIdx = uriSpec.indexOf(':'); 515 if (colonIdx != -1) { 516 final int searchFrom = colonIdx - 1; 517 // search backwards starting from character before ':'. 518 int slashIdx = uriSpec.lastIndexOf('/', searchFrom); 519 int queryIdx = uriSpec.lastIndexOf('?', searchFrom); 520 int fragmentIdx = uriSpec.lastIndexOf('#', searchFrom); 521 522 if (colonIdx == 0 || slashIdx != -1 || 523 queryIdx != -1 || fragmentIdx != -1) { 524 // A standalone base is a valid URI according to spec 525 if (colonIdx == 0 || (p_base == null && fragmentIdx != 0 && !allowNonAbsoluteURI)) { 526 throw new MalformedURIException("No scheme found in URI."); 527 } 528 } 529 else { 530 initializeScheme(uriSpec); 531 index = m_scheme.length()+1; 532 533 // Neither 'scheme:' or 'scheme:#fragment' are valid URIs. 534 if (colonIdx == uriSpecLen - 1 || uriSpec.charAt(colonIdx+1) == '#') { 535 throw new MalformedURIException("Scheme specific part cannot be empty."); 536 } 537 } 538 } 539 else if (p_base == null && uriSpec.indexOf('#') != 0 && !allowNonAbsoluteURI) { 540 throw new MalformedURIException("No scheme found in URI."); 541 } 542 543 // Two slashes means we may have authority, but definitely means we're either 544 // matching net_path or abs_path. These two productions are ambiguous in that 545 // every net_path (except those containing an IPv6Reference) is an abs_path. 546 // RFC 2396 resolves this ambiguity by applying a greedy left most matching rule. 547 // Try matching net_path first, and if that fails we don't have authority so 548 // then attempt to match abs_path. 549 // 550 // net_path = "//" authority [ abs_path ] 551 // abs_path = "/" path_segments 552 if (((index+1) < uriSpecLen) && 553 (uriSpec.charAt(index) == '/' && uriSpec.charAt(index+1) == '/')) { 554 index += 2; 555 int startPos = index; 556 557 // Authority will be everything up to path, query or fragment 558 char testChar = '\0'; 559 while (index < uriSpecLen) { 560 testChar = uriSpec.charAt(index); 561 if (testChar == '/' || testChar == '?' || testChar == '#') { 562 break; 563 } 564 index++; 565 } 566 567 // Attempt to parse authority. If the section is an empty string 568 // this is a valid server based authority, so set the host to this 569 // value. 570 if (index > startPos) { 571 // If we didn't find authority we need to back up. Attempt to 572 // match against abs_path next. 573 if (!initializeAuthority(uriSpec.substring(startPos, index))) { 574 index = startPos - 2; 575 } 576 } 577 else { 578 m_host = ""; 579 } 580 } 581 582 initializePath(uriSpec, index); 583 584 // Resolve relative URI to base URI - see RFC 2396 Section 5.2 585 // In some cases, it might make more sense to throw an exception 586 // (when scheme is specified is the string spec and the base URI 587 // is also specified, for example), but we're just following the 588 // RFC specifications 589 if (p_base != null) { 590 absolutize(p_base); 591 } 592 } 593 594 /** 595 * Initializes this URI from a base URI and a URI specification string. 596 * See RFC 2396 Section 4 and Appendix B for specifications on parsing 597 * the URI and Section 5 for specifications on resolving relative URIs 598 * and relative paths. 599 * 600 * @param p_base the base URI (may be null if p_uriSpec is an absolute 601 * URI) 602 * @param p_uriSpec the URI spec string which may be an absolute or 603 * relative URI (can only be null/empty if p_base 604 * is not null) 605 * 606 * @exception MalformedURIException if p_base is null and p_uriSpec 607 * is not an absolute URI or if 608 * p_uriSpec violates syntax rules 609 */ 610 private void initialize(URI p_base, String p_uriSpec) 611 throws MalformedURIException { 612 613 String uriSpec = p_uriSpec; 614 int uriSpecLen = (uriSpec != null) ? uriSpec.length() : 0; 615 616 if (p_base == null && uriSpecLen == 0) { 617 throw new MalformedURIException( 618 "Cannot initialize URI with empty parameters."); 619 } 620 621 // just make a copy of the base if spec is empty 622 if (uriSpecLen == 0) { 623 initialize(p_base); 624 return; 625 } 626 627 int index = 0; 628 629 // Check for scheme, which must be before '/', '?' or '#'. 630 int colonIdx = uriSpec.indexOf(':'); 631 if (colonIdx != -1) { 632 final int searchFrom = colonIdx - 1; 633 // search backwards starting from character before ':'. 634 int slashIdx = uriSpec.lastIndexOf('/', searchFrom); 635 int queryIdx = uriSpec.lastIndexOf('?', searchFrom); 636 int fragmentIdx = uriSpec.lastIndexOf('#', searchFrom); 637 638 if (colonIdx == 0 || slashIdx != -1 || 639 queryIdx != -1 || fragmentIdx != -1) { 640 // A standalone base is a valid URI according to spec 641 if (colonIdx == 0 || (p_base == null && fragmentIdx != 0)) { 642 throw new MalformedURIException("No scheme found in URI."); 643 } 644 } 645 else { 646 initializeScheme(uriSpec); 647 index = m_scheme.length()+1; 648 649 // Neither 'scheme:' or 'scheme:#fragment' are valid URIs. 650 if (colonIdx == uriSpecLen - 1 || uriSpec.charAt(colonIdx+1) == '#') { 651 throw new MalformedURIException("Scheme specific part cannot be empty."); 652 } 653 } 654 } 655 else if (p_base == null && uriSpec.indexOf('#') != 0) { 656 throw new MalformedURIException("No scheme found in URI."); 657 } 658 659 // Two slashes means we may have authority, but definitely means we're either 660 // matching net_path or abs_path. These two productions are ambiguous in that 661 // every net_path (except those containing an IPv6Reference) is an abs_path. 662 // RFC 2396 resolves this ambiguity by applying a greedy left most matching rule. 663 // Try matching net_path first, and if that fails we don't have authority so 664 // then attempt to match abs_path. 665 // 666 // net_path = "//" authority [ abs_path ] 667 // abs_path = "/" path_segments 668 if (((index+1) < uriSpecLen) && 669 (uriSpec.charAt(index) == '/' && uriSpec.charAt(index+1) == '/')) { 670 index += 2; 671 int startPos = index; 672 673 // Authority will be everything up to path, query or fragment 674 char testChar = '\0'; 675 while (index < uriSpecLen) { 676 testChar = uriSpec.charAt(index); 677 if (testChar == '/' || testChar == '?' || testChar == '#') { 678 break; 679 } 680 index++; 681 } 682 683 // Attempt to parse authority. If the section is an empty string 684 // this is a valid server based authority, so set the host to this 685 // value. 686 if (index > startPos) { 687 // If we didn't find authority we need to back up. Attempt to 688 // match against abs_path next. 689 if (!initializeAuthority(uriSpec.substring(startPos, index))) { 690 index = startPos - 2; 691 } 692 } 693 else { 694 m_host = ""; 695 } 696 } 697 698 initializePath(uriSpec, index); 699 700 // Resolve relative URI to base URI - see RFC 2396 Section 5.2 701 // In some cases, it might make more sense to throw an exception 702 // (when scheme is specified is the string spec and the base URI 703 // is also specified, for example), but we're just following the 704 // RFC specifications 705 if (p_base != null) { 706 absolutize(p_base); 707 } 708 } 709 710 /** 711 * Absolutize URI with given base URI. 712 * 713 * @param p_base base URI for absolutization 714 */ 715 public void absolutize(URI p_base) { 716 717 // check to see if this is the current doc - RFC 2396 5.2 #2 718 // note that this is slightly different from the RFC spec in that 719 // we don't include the check for query string being null 720 // - this handles cases where the urispec is just a query 721 // string or a fragment (e.g. "?y" or "#s") - 722 // see <http://www.ics.uci.edu/~fielding/url/test1.html> which 723 // identified this as a bug in the RFC 724 if (m_path.length() == 0 && m_scheme == null && 725 m_host == null && m_regAuthority == null) { 726 m_scheme = p_base.getScheme(); 727 m_userinfo = p_base.getUserinfo(); 728 m_host = p_base.getHost(); 729 m_port = p_base.getPort(); 730 m_regAuthority = p_base.getRegBasedAuthority(); 731 m_path = p_base.getPath(); 732 733 if (m_queryString == null) { 734 m_queryString = p_base.getQueryString(); 735 736 if (m_fragment == null) { 737 m_fragment = p_base.getFragment(); 738 } 739 } 740 return; 741 } 742 743 // check for scheme - RFC 2396 5.2 #3 744 // if we found a scheme, it means absolute URI, so we're done 745 if (m_scheme == null) { 746 m_scheme = p_base.getScheme(); 747 } 748 else { 749 return; 750 } 751 752 // check for authority - RFC 2396 5.2 #4 753 // if we found a host, then we've got a network path, so we're done 754 if (m_host == null && m_regAuthority == null) { 755 m_userinfo = p_base.getUserinfo(); 756 m_host = p_base.getHost(); 757 m_port = p_base.getPort(); 758 m_regAuthority = p_base.getRegBasedAuthority(); 759 } 760 else { 761 return; 762 } 763 764 // check for absolute path - RFC 2396 5.2 #5 765 if (m_path.length() > 0 && 766 m_path.startsWith("/")) { 767 return; 768 } 769 770 // if we get to this point, we need to resolve relative path 771 // RFC 2396 5.2 #6 772 String path = ""; 773 String basePath = p_base.getPath(); 774 775 // 6a - get all but the last segment of the base URI path 776 if (basePath != null && basePath.length() > 0) { 777 int lastSlash = basePath.lastIndexOf('/'); 778 if (lastSlash != -1) { 779 path = basePath.substring(0, lastSlash+1); 780 } 781 } 782 else if (m_path.length() > 0) { 783 path = "/"; 784 } 785 786 // 6b - append the relative URI path 787 path = path.concat(m_path); 788 789 // 6c - remove all "./" where "." is a complete path segment 790 int index = -1; 791 while ((index = path.indexOf("/./")) != -1) { 792 path = path.substring(0, index+1).concat(path.substring(index+3)); 793 } 794 795 // 6d - remove "." if path ends with "." as a complete path segment 796 if (path.endsWith("/.")) { 797 path = path.substring(0, path.length()-1); 798 } 799 800 // 6e - remove all "<segment>/../" where "<segment>" is a complete 801 // path segment not equal to ".." 802 index = 1; 803 int segIndex = -1; 804 String tempString = null; 805 806 while ((index = path.indexOf("/../", index)) > 0) { 807 tempString = path.substring(0, path.indexOf("/../")); 808 segIndex = tempString.lastIndexOf('/'); 809 if (segIndex != -1) { 810 if (!tempString.substring(segIndex).equals("..")) { 811 path = path.substring(0, segIndex+1).concat(path.substring(index+4)); 812 index = segIndex; 813 } 814 else { 815 index += 4; 816 } 817 } 818 else { 819 index += 4; 820 } 821 } 822 823 // 6f - remove ending "<segment>/.." where "<segment>" is a 824 // complete path segment 825 if (path.endsWith("/..")) { 826 tempString = path.substring(0, path.length()-3); 827 segIndex = tempString.lastIndexOf('/'); 828 if (segIndex != -1) { 829 path = path.substring(0, segIndex+1); 830 } 831 } 832 m_path = path; 833 } 834 835 /** 836 * Initialize the scheme for this URI from a URI string spec. 837 * 838 * @param p_uriSpec the URI specification (cannot be null) 839 * 840 * @exception MalformedURIException if URI does not have a conformant 841 * scheme 842 */ 843 private void initializeScheme(String p_uriSpec) 844 throws MalformedURIException { 845 int uriSpecLen = p_uriSpec.length(); 846 int index = 0; 847 String scheme = null; 848 char testChar = '\0'; 849 850 while (index < uriSpecLen) { 851 testChar = p_uriSpec.charAt(index); 852 if (testChar == ':' || testChar == '/' || 853 testChar == '?' || testChar == '#') { 854 break; 855 } 856 index++; 857 } 858 scheme = p_uriSpec.substring(0, index); 859 860 if (scheme.length() == 0) { 861 throw new MalformedURIException("No scheme found in URI."); 862 } 863 else { 864 setScheme(scheme); 865 } 866 } 867 868 /** 869 * Initialize the authority (either server or registry based) 870 * for this URI from a URI string spec. 871 * 872 * @param p_uriSpec the URI specification (cannot be null) 873 * 874 * @return true if the given string matched server or registry 875 * based authority 876 */ 877 private boolean initializeAuthority(String p_uriSpec) { 878 879 int index = 0; 880 int start = 0; 881 int end = p_uriSpec.length(); 882 883 char testChar = '\0'; 884 String userinfo = null; 885 886 // userinfo is everything up to @ 887 if (p_uriSpec.indexOf('@', start) != -1) { 888 while (index < end) { 889 testChar = p_uriSpec.charAt(index); 890 if (testChar == '@') { 891 break; 892 } 893 index++; 894 } 895 userinfo = p_uriSpec.substring(start, index); 896 index++; 897 } 898 899 // host is everything up to last ':', or up to 900 // and including ']' if followed by ':'. 901 String host = null; 902 start = index; 903 boolean hasPort = false; 904 if (index < end) { 905 if (p_uriSpec.charAt(start) == '[') { 906 int bracketIndex = p_uriSpec.indexOf(']', start); 907 index = (bracketIndex != -1) ? bracketIndex : end; 908 if (index+1 < end && p_uriSpec.charAt(index+1) == ':') { 909 ++index; 910 hasPort = true; 911 } 912 else { 913 index = end; 914 } 915 } 916 else { 917 int colonIndex = p_uriSpec.lastIndexOf(':', end); 918 index = (colonIndex > start) ? colonIndex : end; 919 hasPort = (index != end); 920 } 921 } 922 host = p_uriSpec.substring(start, index); 923 int port = -1; 924 if (host.length() > 0) { 925 // port 926 if (hasPort) { 927 index++; 928 start = index; 929 while (index < end) { 930 index++; 931 } 932 String portStr = p_uriSpec.substring(start, index); 933 if (portStr.length() > 0) { 934 // REVISIT: Remove this code. 935 /** for (int i = 0; i < portStr.length(); i++) { 936 if (!isDigit(portStr.charAt(i))) { 937 throw new MalformedURIException( 938 portStr + 939 " is invalid. Port should only contain digits!"); 940 } 941 }**/ 942 // REVISIT: Remove this code. 943 // Store port value as string instead of integer. 944 try { 945 port = Integer.parseInt(portStr); 946 if (port == -1) --port; 947 } 948 catch (NumberFormatException nfe) { 949 port = -2; 950 } 951 } 952 } 953 } 954 955 if (isValidServerBasedAuthority(host, port, userinfo)) { 956 m_host = host; 957 m_port = port; 958 m_userinfo = userinfo; 959 return true; 960 } 961 // Note: Registry based authority is being removed from a 962 // new spec for URI which would obsolete RFC 2396. If the 963 // spec is added to XML errata, processing of reg_name 964 // needs to be removed. - mrglavas. 965 else if (isValidRegistryBasedAuthority(p_uriSpec)) { 966 m_regAuthority = p_uriSpec; 967 return true; 968 } 969 return false; 970 } 971 972 /** 973 * Determines whether the components host, port, and user info 974 * are valid as a server authority. 975 * 976 * @param host the host component of authority 977 * @param port the port number component of authority 978 * @param userinfo the user info component of authority 979 * 980 * @return true if the given host, port, and userinfo compose 981 * a valid server authority 982 */ 983 private boolean isValidServerBasedAuthority(String host, int port, String userinfo) { 984 985 // Check if the host is well formed. 986 if (!isWellFormedAddress(host)) { 987 return false; 988 } 989 990 // Check that port is well formed if it exists. 991 // REVISIT: There's no restriction on port value ranges, but 992 // perform the same check as in setPort to be consistent. Pass 993 // in a string to this method instead of an integer. 994 if (port < -1 || port > 65535) { 995 return false; 996 } 997 998 // Check that userinfo is well formed if it exists. 999 if (userinfo != null) { 1000 // Userinfo can contain alphanumerics, mark characters, escaped 1001 // and ';',':','&','=','+','$',',' 1002 int index = 0; 1003 int end = userinfo.length(); 1004 char testChar = '\0'; 1005 while (index < end) { 1006 testChar = userinfo.charAt(index); 1007 if (testChar == '%') { 1008 if (index+2 >= end || 1009 !isHex(userinfo.charAt(index+1)) || 1010 !isHex(userinfo.charAt(index+2))) { 1011 return false; 1012 } 1013 index += 2; 1014 } 1015 else if (!isUserinfoCharacter(testChar)) { 1016 return false; 1017 } 1018 ++index; 1019 } 1020 } 1021 return true; 1022 } 1023 1024 /** 1025 * Determines whether the given string is a registry based authority. 1026 * 1027 * @param authority the authority component of a URI 1028 * 1029 * @return true if the given string is a registry based authority 1030 */ 1031 private boolean isValidRegistryBasedAuthority(String authority) { 1032 int index = 0; 1033 int end = authority.length(); 1034 char testChar; 1035 1036 while (index < end) { 1037 testChar = authority.charAt(index); 1038 1039 // check for valid escape sequence 1040 if (testChar == '%') { 1041 if (index+2 >= end || 1042 !isHex(authority.charAt(index+1)) || 1043 !isHex(authority.charAt(index+2))) { 1044 return false; 1045 } 1046 index += 2; 1047 } 1048 // can check against path characters because the set 1049 // is the same except for '/' which we've already excluded. 1050 else if (!isPathCharacter(testChar)) { 1051 return false; 1052 } 1053 ++index; 1054 } 1055 return true; 1056 } 1057 1058 /** 1059 * Initialize the path for this URI from a URI string spec. 1060 * 1061 * @param p_uriSpec the URI specification (cannot be null) 1062 * @param p_nStartIndex the index to begin scanning from 1063 * 1064 * @exception MalformedURIException if p_uriSpec violates syntax rules 1065 */ 1066 private void initializePath(String p_uriSpec, int p_nStartIndex) 1067 throws MalformedURIException { 1068 if (p_uriSpec == null) { 1069 throw new MalformedURIException( 1070 "Cannot initialize path from null string!"); 1071 } 1072 1073 int index = p_nStartIndex; 1074 int start = p_nStartIndex; 1075 int end = p_uriSpec.length(); 1076 char testChar = '\0'; 1077 1078 // path - everything up to query string or fragment 1079 if (start < end) { 1080 // RFC 2732 only allows '[' and ']' to appear in the opaque part. 1081 if (getScheme() == null || p_uriSpec.charAt(start) == '/') { 1082 1083 // Scan path. 1084 // abs_path = "/" path_segments 1085 // rel_path = rel_segment [ abs_path ] 1086 while (index < end) { 1087 testChar = p_uriSpec.charAt(index); 1088 1089 // check for valid escape sequence 1090 if (testChar == '%') { 1091 if (index+2 >= end || 1092 !isHex(p_uriSpec.charAt(index+1)) || 1093 !isHex(p_uriSpec.charAt(index+2))) { 1094 throw new MalformedURIException( 1095 "Path contains invalid escape sequence!"); 1096 } 1097 index += 2; 1098 } 1099 // Path segments cannot contain '[' or ']' since pchar 1100 // production was not changed by RFC 2732. 1101 else if (!isPathCharacter(testChar)) { 1102 if (testChar == '?' || testChar == '#') { 1103 break; 1104 } 1105 throw new MalformedURIException( 1106 "Path contains invalid character: " + testChar); 1107 } 1108 ++index; 1109 } 1110 } 1111 else { 1112 1113 // Scan opaque part. 1114 // opaque_part = uric_no_slash *uric 1115 while (index < end) { 1116 testChar = p_uriSpec.charAt(index); 1117 1118 if (testChar == '?' || testChar == '#') { 1119 break; 1120 } 1121 1122 // check for valid escape sequence 1123 if (testChar == '%') { 1124 if (index+2 >= end || 1125 !isHex(p_uriSpec.charAt(index+1)) || 1126 !isHex(p_uriSpec.charAt(index+2))) { 1127 throw new MalformedURIException( 1128 "Opaque part contains invalid escape sequence!"); 1129 } 1130 index += 2; 1131 } 1132 // If the scheme specific part is opaque, it can contain '[' 1133 // and ']'. uric_no_slash wasn't modified by RFC 2732, which 1134 // I've interpreted as an error in the spec, since the 1135 // production should be equivalent to (uric - '/'), and uric 1136 // contains '[' and ']'. - mrglavas 1137 else if (!isURICharacter(testChar)) { 1138 throw new MalformedURIException( 1139 "Opaque part contains invalid character: " + testChar); 1140 } 1141 ++index; 1142 } 1143 } 1144 } 1145 m_path = p_uriSpec.substring(start, index); 1146 1147 // query - starts with ? and up to fragment or end 1148 if (testChar == '?') { 1149 index++; 1150 start = index; 1151 while (index < end) { 1152 testChar = p_uriSpec.charAt(index); 1153 if (testChar == '#') { 1154 break; 1155 } 1156 if (testChar == '%') { 1157 if (index+2 >= end || 1158 !isHex(p_uriSpec.charAt(index+1)) || 1159 !isHex(p_uriSpec.charAt(index+2))) { 1160 throw new MalformedURIException( 1161 "Query string contains invalid escape sequence!"); 1162 } 1163 index += 2; 1164 } 1165 else if (!isURICharacter(testChar)) { 1166 throw new MalformedURIException( 1167 "Query string contains invalid character: " + testChar); 1168 } 1169 index++; 1170 } 1171 m_queryString = p_uriSpec.substring(start, index); 1172 } 1173 1174 // fragment - starts with # 1175 if (testChar == '#') { 1176 index++; 1177 start = index; 1178 while (index < end) { 1179 testChar = p_uriSpec.charAt(index); 1180 1181 if (testChar == '%') { 1182 if (index+2 >= end || 1183 !isHex(p_uriSpec.charAt(index+1)) || 1184 !isHex(p_uriSpec.charAt(index+2))) { 1185 throw new MalformedURIException( 1186 "Fragment contains invalid escape sequence!"); 1187 } 1188 index += 2; 1189 } 1190 else if (!isURICharacter(testChar)) { 1191 throw new MalformedURIException( 1192 "Fragment contains invalid character: "+testChar); 1193 } 1194 index++; 1195 } 1196 m_fragment = p_uriSpec.substring(start, index); 1197 } 1198 } 1199 1200 /** 1201 * Get the scheme for this URI. 1202 * 1203 * @return the scheme for this URI 1204 */ 1205 public String getScheme() { 1206 return m_scheme; 1207 } 1208 1209 /** 1210 * Get the scheme-specific part for this URI (everything following the 1211 * scheme and the first colon). See RFC 2396 Section 5.2 for spec. 1212 * 1213 * @return the scheme-specific part for this URI 1214 */ 1215 public String getSchemeSpecificPart() { 1216 final StringBuilder schemespec = new StringBuilder(); 1217 1218 if (m_host != null || m_regAuthority != null) { 1219 schemespec.append("//"); 1220 1221 // Server based authority. 1222 if (m_host != null) { 1223 1224 if (m_userinfo != null) { 1225 schemespec.append(m_userinfo); 1226 schemespec.append('@'); 1227 } 1228 1229 schemespec.append(m_host); 1230 1231 if (m_port != -1) { 1232 schemespec.append(':'); 1233 schemespec.append(m_port); 1234 } 1235 } 1236 // Registry based authority. 1237 else { 1238 schemespec.append(m_regAuthority); 1239 } 1240 } 1241 1242 if (m_path != null) { 1243 schemespec.append((m_path)); 1244 } 1245 1246 if (m_queryString != null) { 1247 schemespec.append('?'); 1248 schemespec.append(m_queryString); 1249 } 1250 1251 if (m_fragment != null) { 1252 schemespec.append('#'); 1253 schemespec.append(m_fragment); 1254 } 1255 1256 return schemespec.toString(); 1257 } 1258 1259 /** 1260 * Get the userinfo for this URI. 1261 * 1262 * @return the userinfo for this URI (null if not specified). 1263 */ 1264 public String getUserinfo() { 1265 return m_userinfo; 1266 } 1267 1268 /** 1269 * Get the host for this URI. 1270 * 1271 * @return the host for this URI (null if not specified). 1272 */ 1273 public String getHost() { 1274 return m_host; 1275 } 1276 1277 /** 1278 * Get the port for this URI. 1279 * 1280 * @return the port for this URI (-1 if not specified). 1281 */ 1282 public int getPort() { 1283 return m_port; 1284 } 1285 1286 /** 1287 * Get the registry based authority for this URI. 1288 * 1289 * @return the registry based authority (null if not specified). 1290 */ 1291 public String getRegBasedAuthority() { 1292 return m_regAuthority; 1293 } 1294 1295 /** 1296 * Get the authority for this URI. 1297 * 1298 * @return the authority 1299 */ 1300 public String getAuthority() { 1301 final StringBuilder authority = new StringBuilder(); 1302 if (m_host != null || m_regAuthority != null) { 1303 authority.append("//"); 1304 1305 // Server based authority. 1306 if (m_host != null) { 1307 1308 if (m_userinfo != null) { 1309 authority.append(m_userinfo); 1310 authority.append('@'); 1311 } 1312 1313 authority.append(m_host); 1314 1315 if (m_port != -1) { 1316 authority.append(':'); 1317 authority.append(m_port); 1318 } 1319 } 1320 // Registry based authority. 1321 else { 1322 authority.append(m_regAuthority); 1323 } 1324 } 1325 return authority.toString(); 1326 } 1327 1328 /** 1329 * Get the path for this URI (optionally with the query string and 1330 * fragment). 1331 * 1332 * @param p_includeQueryString if true (and query string is not null), 1333 * then a "?" followed by the query string 1334 * will be appended 1335 * @param p_includeFragment if true (and fragment is not null), 1336 * then a "#" followed by the fragment 1337 * will be appended 1338 * 1339 * @return the path for this URI possibly including the query string 1340 * and fragment 1341 */ 1342 public String getPath(boolean p_includeQueryString, 1343 boolean p_includeFragment) { 1344 final StringBuilder pathString = new StringBuilder(m_path); 1345 1346 if (p_includeQueryString && m_queryString != null) { 1347 pathString.append('?'); 1348 pathString.append(m_queryString); 1349 } 1350 1351 if (p_includeFragment && m_fragment != null) { 1352 pathString.append('#'); 1353 pathString.append(m_fragment); 1354 } 1355 return pathString.toString(); 1356 } 1357 1358 /** 1359 * Get the path for this URI. Note that the value returned is the path 1360 * only and does not include the query string or fragment. 1361 * 1362 * @return the path for this URI. 1363 */ 1364 public String getPath() { 1365 return m_path; 1366 } 1367 1368 /** 1369 * Get the query string for this URI. 1370 * 1371 * @return the query string for this URI. Null is returned if there 1372 * was no "?" in the URI spec, empty string if there was a 1373 * "?" but no query string following it. 1374 */ 1375 public String getQueryString() { 1376 return m_queryString; 1377 } 1378 1379 /** 1380 * Get the fragment for this URI. 1381 * 1382 * @return the fragment for this URI. Null is returned if there 1383 * was no "#" in the URI spec, empty string if there was a 1384 * "#" but no fragment following it. 1385 */ 1386 public String getFragment() { 1387 return m_fragment; 1388 } 1389 1390 /** 1391 * Set the scheme for this URI. The scheme is converted to lowercase 1392 * before it is set. 1393 * 1394 * @param p_scheme the scheme for this URI (cannot be null) 1395 * 1396 * @exception MalformedURIException if p_scheme is not a conformant 1397 * scheme name 1398 */ 1399 public void setScheme(String p_scheme) throws MalformedURIException { 1400 if (p_scheme == null) { 1401 throw new MalformedURIException( 1402 "Cannot set scheme from null string!"); 1403 } 1404 if (!isConformantSchemeName(p_scheme)) { 1405 throw new MalformedURIException("The scheme is not conformant."); 1406 } 1407 1408 m_scheme = p_scheme.toLowerCase(); 1409 } 1410 1411 /** 1412 * Set the userinfo for this URI. If a non-null value is passed in and 1413 * the host value is null, then an exception is thrown. 1414 * 1415 * @param p_userinfo the userinfo for this URI 1416 * 1417 * @exception MalformedURIException if p_userinfo contains invalid 1418 * characters 1419 */ 1420 public void setUserinfo(String p_userinfo) throws MalformedURIException { 1421 if (p_userinfo == null) { 1422 m_userinfo = null; 1423 return; 1424 } 1425 else { 1426 if (m_host == null) { 1427 throw new MalformedURIException( 1428 "Userinfo cannot be set when host is null!"); 1429 } 1430 1431 // userinfo can contain alphanumerics, mark characters, escaped 1432 // and ';',':','&','=','+','$',',' 1433 int index = 0; 1434 int end = p_userinfo.length(); 1435 char testChar = '\0'; 1436 while (index < end) { 1437 testChar = p_userinfo.charAt(index); 1438 if (testChar == '%') { 1439 if (index+2 >= end || 1440 !isHex(p_userinfo.charAt(index+1)) || 1441 !isHex(p_userinfo.charAt(index+2))) { 1442 throw new MalformedURIException( 1443 "Userinfo contains invalid escape sequence!"); 1444 } 1445 } 1446 else if (!isUserinfoCharacter(testChar)) { 1447 throw new MalformedURIException( 1448 "Userinfo contains invalid character:"+testChar); 1449 } 1450 index++; 1451 } 1452 } 1453 m_userinfo = p_userinfo; 1454 } 1455 1456 /** 1457 * <p>Set the host for this URI. If null is passed in, the userinfo 1458 * field is also set to null and the port is set to -1.</p> 1459 * 1460 * <p>Note: This method overwrites registry based authority if it 1461 * previously existed in this URI.</p> 1462 * 1463 * @param p_host the host for this URI 1464 * 1465 * @exception MalformedURIException if p_host is not a valid IP 1466 * address or DNS hostname. 1467 */ 1468 public void setHost(String p_host) throws MalformedURIException { 1469 if (p_host == null || p_host.length() == 0) { 1470 if (p_host != null) { 1471 m_regAuthority = null; 1472 } 1473 m_host = p_host; 1474 m_userinfo = null; 1475 m_port = -1; 1476 return; 1477 } 1478 else if (!isWellFormedAddress(p_host)) { 1479 throw new MalformedURIException("Host is not a well formed address!"); 1480 } 1481 m_host = p_host; 1482 m_regAuthority = null; 1483 } 1484 1485 /** 1486 * Set the port for this URI. -1 is used to indicate that the port is 1487 * not specified, otherwise valid port numbers are between 0 and 65535. 1488 * If a valid port number is passed in and the host field is null, 1489 * an exception is thrown. 1490 * 1491 * @param p_port the port number for this URI 1492 * 1493 * @exception MalformedURIException if p_port is not -1 and not a 1494 * valid port number 1495 */ 1496 public void setPort(int p_port) throws MalformedURIException { 1497 if (p_port >= 0 && p_port <= 65535) { 1498 if (m_host == null) { 1499 throw new MalformedURIException( 1500 "Port cannot be set when host is null!"); 1501 } 1502 } 1503 else if (p_port != -1) { 1504 throw new MalformedURIException("Invalid port number!"); 1505 } 1506 m_port = p_port; 1507 } 1508 1509 /** 1510 * <p>Sets the registry based authority for this URI.</p> 1511 * 1512 * <p>Note: This method overwrites server based authority 1513 * if it previously existed in this URI.</p> 1514 * 1515 * @param authority the registry based authority for this URI 1516 * 1517 * @exception MalformedURIException it authority is not a 1518 * well formed registry based authority 1519 */ 1520 public void setRegBasedAuthority(String authority) 1521 throws MalformedURIException { 1522 1523 if (authority == null) { 1524 m_regAuthority = null; 1525 return; 1526 } 1527 // reg_name = 1*( unreserved | escaped | "$" | "," | 1528 // ";" | ":" | "@" | "&" | "=" | "+" ) 1529 else if (authority.length() < 1 || 1530 !isValidRegistryBasedAuthority(authority) || 1531 authority.indexOf('/') != -1) { 1532 throw new MalformedURIException("Registry based authority is not well formed."); 1533 } 1534 m_regAuthority = authority; 1535 m_host = null; 1536 m_userinfo = null; 1537 m_port = -1; 1538 } 1539 1540 /** 1541 * Set the path for this URI. If the supplied path is null, then the 1542 * query string and fragment are set to null as well. If the supplied 1543 * path includes a query string and/or fragment, these fields will be 1544 * parsed and set as well. Note that, for URIs following the "generic 1545 * URI" syntax, the path specified should start with a slash. 1546 * For URIs that do not follow the generic URI syntax, this method 1547 * sets the scheme-specific part. 1548 * 1549 * @param p_path the path for this URI (may be null) 1550 * 1551 * @exception MalformedURIException if p_path contains invalid 1552 * characters 1553 */ 1554 public void setPath(String p_path) throws MalformedURIException { 1555 if (p_path == null) { 1556 m_path = null; 1557 m_queryString = null; 1558 m_fragment = null; 1559 } 1560 else { 1561 initializePath(p_path, 0); 1562 } 1563 } 1564 1565 /** 1566 * Append to the end of the path of this URI. If the current path does 1567 * not end in a slash and the path to be appended does not begin with 1568 * a slash, a slash will be appended to the current path before the 1569 * new segment is added. Also, if the current path ends in a slash 1570 * and the new segment begins with a slash, the extra slash will be 1571 * removed before the new segment is appended. 1572 * 1573 * @param p_addToPath the new segment to be added to the current path 1574 * 1575 * @exception MalformedURIException if p_addToPath contains syntax 1576 * errors 1577 */ 1578 public void appendPath(String p_addToPath) 1579 throws MalformedURIException { 1580 if (p_addToPath == null || p_addToPath.trim().length() == 0) { 1581 return; 1582 } 1583 1584 if (!isURIString(p_addToPath)) { 1585 throw new MalformedURIException( 1586 "Path contains invalid character!"); 1587 } 1588 1589 if (m_path == null || m_path.trim().length() == 0) { 1590 if (p_addToPath.startsWith("/")) { 1591 m_path = p_addToPath; 1592 } 1593 else { 1594 m_path = "/" + p_addToPath; 1595 } 1596 } 1597 else if (m_path.endsWith("/")) { 1598 if (p_addToPath.startsWith("/")) { 1599 m_path = m_path.concat(p_addToPath.substring(1)); 1600 } 1601 else { 1602 m_path = m_path.concat(p_addToPath); 1603 } 1604 } 1605 else { 1606 if (p_addToPath.startsWith("/")) { 1607 m_path = m_path.concat(p_addToPath); 1608 } 1609 else { 1610 m_path = m_path.concat("/" + p_addToPath); 1611 } 1612 } 1613 } 1614 1615 /** 1616 * Set the query string for this URI. A non-null value is valid only 1617 * if this is an URI conforming to the generic URI syntax and 1618 * the path value is not null. 1619 * 1620 * @param p_queryString the query string for this URI 1621 * 1622 * @exception MalformedURIException if p_queryString is not null and this 1623 * URI does not conform to the generic 1624 * URI syntax or if the path is null 1625 */ 1626 public void setQueryString(String p_queryString) throws MalformedURIException { 1627 if (p_queryString == null) { 1628 m_queryString = null; 1629 } 1630 else if (!isGenericURI()) { 1631 throw new MalformedURIException( 1632 "Query string can only be set for a generic URI!"); 1633 } 1634 else if (getPath() == null) { 1635 throw new MalformedURIException( 1636 "Query string cannot be set when path is null!"); 1637 } 1638 else if (!isURIString(p_queryString)) { 1639 throw new MalformedURIException( 1640 "Query string contains invalid character!"); 1641 } 1642 else { 1643 m_queryString = p_queryString; 1644 } 1645 } 1646 1647 /** 1648 * Set the fragment for this URI. A non-null value is valid only 1649 * if this is a URI conforming to the generic URI syntax and 1650 * the path value is not null. 1651 * 1652 * @param p_fragment the fragment for this URI 1653 * 1654 * @exception MalformedURIException if p_fragment is not null and this 1655 * URI does not conform to the generic 1656 * URI syntax or if the path is null 1657 */ 1658 public void setFragment(String p_fragment) throws MalformedURIException { 1659 if (p_fragment == null) { 1660 m_fragment = null; 1661 } 1662 else if (!isGenericURI()) { 1663 throw new MalformedURIException( 1664 "Fragment can only be set for a generic URI!"); 1665 } 1666 else if (getPath() == null) { 1667 throw new MalformedURIException( 1668 "Fragment cannot be set when path is null!"); 1669 } 1670 else if (!isURIString(p_fragment)) { 1671 throw new MalformedURIException( 1672 "Fragment contains invalid character!"); 1673 } 1674 else { 1675 m_fragment = p_fragment; 1676 } 1677 } 1678 1679 /** 1680 * Determines if the passed-in Object is equivalent to this URI. 1681 * 1682 * @param p_test the Object to test for equality. 1683 * 1684 * @return true if p_test is a URI with all values equal to this 1685 * URI, false otherwise 1686 */ 1687 @Override 1688 public boolean equals(Object p_test) { 1689 if (p_test instanceof URI) { 1690 URI testURI = (URI) p_test; 1691 if (((m_scheme == null && testURI.m_scheme == null) || 1692 (m_scheme != null && testURI.m_scheme != null && 1693 m_scheme.equals(testURI.m_scheme))) && 1694 ((m_userinfo == null && testURI.m_userinfo == null) || 1695 (m_userinfo != null && testURI.m_userinfo != null && 1696 m_userinfo.equals(testURI.m_userinfo))) && 1697 ((m_host == null && testURI.m_host == null) || 1698 (m_host != null && testURI.m_host != null && 1699 m_host.equals(testURI.m_host))) && 1700 m_port == testURI.m_port && 1701 ((m_path == null && testURI.m_path == null) || 1702 (m_path != null && testURI.m_path != null && 1703 m_path.equals(testURI.m_path))) && 1704 ((m_queryString == null && testURI.m_queryString == null) || 1705 (m_queryString != null && testURI.m_queryString != null && 1706 m_queryString.equals(testURI.m_queryString))) && 1707 ((m_fragment == null && testURI.m_fragment == null) || 1708 (m_fragment != null && testURI.m_fragment != null && 1709 m_fragment.equals(testURI.m_fragment)))) { 1710 return true; 1711 } 1712 } 1713 return false; 1714 } 1715 1716 @Override 1717 public int hashCode() { 1718 int hash = 5; 1719 hash = 47 * hash + Objects.hashCode(this.m_scheme); 1720 hash = 47 * hash + Objects.hashCode(this.m_userinfo); 1721 hash = 47 * hash + Objects.hashCode(this.m_host); 1722 hash = 47 * hash + this.m_port; 1723 hash = 47 * hash + Objects.hashCode(this.m_path); 1724 hash = 47 * hash + Objects.hashCode(this.m_queryString); 1725 hash = 47 * hash + Objects.hashCode(this.m_fragment); 1726 return hash; 1727 } 1728 1729 /** 1730 * Get the URI as a string specification. See RFC 2396 Section 5.2. 1731 * 1732 * @return the URI string specification 1733 */ 1734 @Override 1735 public String toString() { 1736 final StringBuilder uriSpecString = new StringBuilder(); 1737 1738 if (m_scheme != null) { 1739 uriSpecString.append(m_scheme); 1740 uriSpecString.append(':'); 1741 } 1742 uriSpecString.append(getSchemeSpecificPart()); 1743 return uriSpecString.toString(); 1744 } 1745 1746 /** 1747 * Get the indicator as to whether this URI uses the "generic URI" 1748 * syntax. 1749 * 1750 * @return true if this URI uses the "generic URI" syntax, false 1751 * otherwise 1752 */ 1753 public boolean isGenericURI() { 1754 // presence of the host (whether valid or empty) means 1755 // double-slashes which means generic uri 1756 return (m_host != null); 1757 } 1758 1759 /** 1760 * Returns whether this URI represents an absolute URI. 1761 * 1762 * @return true if this URI represents an absolute URI, false 1763 * otherwise 1764 */ 1765 public boolean isAbsoluteURI() { 1766 // presence of the scheme means absolute uri 1767 return (m_scheme != null); 1768 } 1769 1770 /** 1771 * Determine whether a scheme conforms to the rules for a scheme name. 1772 * A scheme is conformant if it starts with an alphanumeric, and 1773 * contains only alphanumerics, '+','-' and '.'. 1774 * 1775 * @return true if the scheme is conformant, false otherwise 1776 */ 1777 public static boolean isConformantSchemeName(String p_scheme) { 1778 if (p_scheme == null || p_scheme.trim().length() == 0) { 1779 return false; 1780 } 1781 1782 if (!isAlpha(p_scheme.charAt(0))) { 1783 return false; 1784 } 1785 1786 char testChar; 1787 int schemeLength = p_scheme.length(); 1788 for (int i = 1; i < schemeLength; ++i) { 1789 testChar = p_scheme.charAt(i); 1790 if (!isSchemeCharacter(testChar)) { 1791 return false; 1792 } 1793 } 1794 1795 return true; 1796 } 1797 1798 /** 1799 * Determine whether a string is syntactically capable of representing 1800 * a valid IPv4 address, IPv6 reference or the domain name of a network host. 1801 * A valid IPv4 address consists of four decimal digit groups separated by a 1802 * '.'. Each group must consist of one to three digits. See RFC 2732 Section 3, 1803 * and RFC 2373 Section 2.2, for the definition of IPv6 references. A hostname 1804 * consists of domain labels (each of which must begin and end with an alphanumeric 1805 * but may contain '-') separated & by a '.'. See RFC 2396 Section 3.2.2. 1806 * 1807 * @return true if the string is a syntactically valid IPv4 address, 1808 * IPv6 reference or hostname 1809 */ 1810 public static boolean isWellFormedAddress(String address) { 1811 if (address == null) { 1812 return false; 1813 } 1814 1815 int addrLength = address.length(); 1816 if (addrLength == 0) { 1817 return false; 1818 } 1819 1820 // Check if the host is a valid IPv6reference. 1821 if (address.startsWith("[")) { 1822 return isWellFormedIPv6Reference(address); 1823 } 1824 1825 // Cannot start with a '.', '-', or end with a '-'. 1826 if (address.startsWith(".") || 1827 address.startsWith("-") || 1828 address.endsWith("-")) { 1829 return false; 1830 } 1831 1832 // rightmost domain label starting with digit indicates IP address 1833 // since top level domain label can only start with an alpha 1834 // see RFC 2396 Section 3.2.2 1835 int index = address.lastIndexOf('.'); 1836 if (address.endsWith(".")) { 1837 index = address.substring(0, index).lastIndexOf('.'); 1838 } 1839 1840 if (index+1 < addrLength && isDigit(address.charAt(index+1))) { 1841 return isWellFormedIPv4Address(address); 1842 } 1843 else { 1844 // hostname = *( domainlabel "." ) toplabel [ "." ] 1845 // domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum 1846 // toplabel = alpha | alpha *( alphanum | "-" ) alphanum 1847 1848 // RFC 2396 states that hostnames take the form described in 1849 // RFC 1034 (Section 3) and RFC 1123 (Section 2.1). According 1850 // to RFC 1034, hostnames are limited to 255 characters. 1851 if (addrLength > 255) { 1852 return false; 1853 } 1854 1855 // domain labels can contain alphanumerics and '-" 1856 // but must start and end with an alphanumeric 1857 char testChar; 1858 int labelCharCount = 0; 1859 1860 for (int i = 0; i < addrLength; i++) { 1861 testChar = address.charAt(i); 1862 if (testChar == '.') { 1863 if (!isAlphanum(address.charAt(i-1))) { 1864 return false; 1865 } 1866 if (i+1 < addrLength && !isAlphanum(address.charAt(i+1))) { 1867 return false; 1868 } 1869 labelCharCount = 0; 1870 } 1871 else if (!isAlphanum(testChar) && testChar != '-') { 1872 return false; 1873 } 1874 // RFC 1034: Labels must be 63 characters or less. 1875 else if (++labelCharCount > 63) { 1876 return false; 1877 } 1878 } 1879 } 1880 return true; 1881 } 1882 1883 /** 1884 * <p>Determines whether a string is an IPv4 address as defined by 1885 * RFC 2373, and under the further constraint that it must be a 32-bit 1886 * address. Though not expressed in the grammar, in order to satisfy 1887 * the 32-bit address constraint, each segment of the address cannot 1888 * be greater than 255 (8 bits of information).</p> 1889 * 1890 * <p><code>IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT</code></p> 1891 * 1892 * @return true if the string is a syntactically valid IPv4 address 1893 */ 1894 public static boolean isWellFormedIPv4Address(String address) { 1895 1896 int addrLength = address.length(); 1897 char testChar; 1898 int numDots = 0; 1899 int numDigits = 0; 1900 1901 // make sure that 1) we see only digits and dot separators, 2) that 1902 // any dot separator is preceded and followed by a digit and 1903 // 3) that we find 3 dots 1904 // 1905 // RFC 2732 amended RFC 2396 by replacing the definition 1906 // of IPv4address with the one defined by RFC 2373. - mrglavas 1907 // 1908 // IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT 1909 // 1910 // One to three digits must be in each segment. 1911 for (int i = 0; i < addrLength; i++) { 1912 testChar = address.charAt(i); 1913 if (testChar == '.') { 1914 if ((i > 0 && !isDigit(address.charAt(i-1))) || 1915 (i+1 < addrLength && !isDigit(address.charAt(i+1)))) { 1916 return false; 1917 } 1918 numDigits = 0; 1919 if (++numDots > 3) { 1920 return false; 1921 } 1922 } 1923 else if (!isDigit(testChar)) { 1924 return false; 1925 } 1926 // Check that that there are no more than three digits 1927 // in this segment. 1928 else if (++numDigits > 3) { 1929 return false; 1930 } 1931 // Check that this segment is not greater than 255. 1932 else if (numDigits == 3) { 1933 char first = address.charAt(i-2); 1934 char second = address.charAt(i-1); 1935 if (!(first < '2' || 1936 (first == '2' && 1937 (second < '5' || 1938 (second == '5' && testChar <= '5'))))) { 1939 return false; 1940 } 1941 } 1942 } 1943 return (numDots == 3); 1944 } 1945 1946 /** 1947 * <p>Determines whether a string is an IPv6 reference as defined 1948 * by RFC 2732, where IPv6address is defined in RFC 2373. The 1949 * IPv6 address is parsed according to Section 2.2 of RFC 2373, 1950 * with the additional constraint that the address be composed of 1951 * 128 bits of information.</p> 1952 * 1953 * <p><code>IPv6reference = "[" IPv6address "]"</code></p> 1954 * 1955 * <p>Note: The BNF expressed in RFC 2373 Appendix B does not 1956 * accurately describe section 2.2, and was in fact removed from 1957 * RFC 3513, the successor of RFC 2373.</p> 1958 * 1959 * @return true if the string is a syntactically valid IPv6 reference 1960 */ 1961 public static boolean isWellFormedIPv6Reference(String address) { 1962 1963 int addrLength = address.length(); 1964 int index = 1; 1965 int end = addrLength-1; 1966 1967 // Check if string is a potential match for IPv6reference. 1968 if (!(addrLength > 2 && address.charAt(0) == '[' 1969 && address.charAt(end) == ']')) { 1970 return false; 1971 } 1972 1973 // Counter for the number of 16-bit sections read in the address. 1974 int [] counter = new int[1]; 1975 1976 // Scan hex sequence before possible '::' or IPv4 address. 1977 index = scanHexSequence(address, index, end, counter); 1978 if (index == -1) { 1979 return false; 1980 } 1981 // Address must contain 128-bits of information. 1982 else if (index == end) { 1983 return (counter[0] == 8); 1984 } 1985 1986 if (index+1 < end && address.charAt(index) == ':') { 1987 if (address.charAt(index+1) == ':') { 1988 // '::' represents at least one 16-bit group of zeros. 1989 if (++counter[0] > 8) { 1990 return false; 1991 } 1992 index += 2; 1993 // Trailing zeros will fill out the rest of the address. 1994 if (index == end) { 1995 return true; 1996 } 1997 } 1998 // If the second character wasn't ':', in order to be valid, 1999 // the remainder of the string must match IPv4Address, 2000 // and we must have read exactly 6 16-bit groups. 2001 else { 2002 return (counter[0] == 6) && 2003 isWellFormedIPv4Address(address.substring(index+1, end)); 2004 } 2005 } 2006 else { 2007 return false; 2008 } 2009 2010 // 3. Scan hex sequence after '::'. 2011 int prevCount = counter[0]; 2012 index = scanHexSequence(address, index, end, counter); 2013 2014 // We've either reached the end of the string, the address ends in 2015 // an IPv4 address, or it is invalid. scanHexSequence has already 2016 // made sure that we have the right number of bits. 2017 return (index == end) || 2018 (index != -1 && isWellFormedIPv4Address( 2019 address.substring((counter[0] > prevCount) ? index+1 : index, end))); 2020 } 2021 2022 /** 2023 * Helper method for isWellFormedIPv6Reference which scans the 2024 * hex sequences of an IPv6 address. It returns the index of the 2025 * next character to scan in the address, or -1 if the string 2026 * cannot match a valid IPv6 address. 2027 * 2028 * @param address the string to be scanned 2029 * @param index the beginning index (inclusive) 2030 * @param end the ending index (exclusive) 2031 * @param counter a counter for the number of 16-bit sections read 2032 * in the address 2033 * 2034 * @return the index of the next character to scan, or -1 if the 2035 * string cannot match a valid IPv6 address 2036 */ 2037 private static int scanHexSequence (String address, int index, int end, int [] counter) { 2038 2039 char testChar; 2040 int numDigits = 0; 2041 int start = index; 2042 2043 // Trying to match the following productions: 2044 // hexseq = hex4 *( ":" hex4) 2045 // hex4 = 1*4HEXDIG 2046 for (; index < end; ++index) { 2047 testChar = address.charAt(index); 2048 if (testChar == ':') { 2049 // IPv6 addresses are 128-bit, so there can be at most eight sections. 2050 if (numDigits > 0 && ++counter[0] > 8) { 2051 return -1; 2052 } 2053 // This could be '::'. 2054 if (numDigits == 0 || ((index+1 < end) && address.charAt(index+1) == ':')) { 2055 return index; 2056 } 2057 numDigits = 0; 2058 } 2059 // This might be invalid or an IPv4address. If it's potentially an IPv4address, 2060 // backup to just after the last valid character that matches hexseq. 2061 else if (!isHex(testChar)) { 2062 if (testChar == '.' && numDigits < 4 && numDigits > 0 && counter[0] <= 6) { 2063 int back = index - numDigits - 1; 2064 return (back >= start) ? back : (back+1); 2065 } 2066 return -1; 2067 } 2068 // There can be at most 4 hex digits per group. 2069 else if (++numDigits > 4) { 2070 return -1; 2071 } 2072 } 2073 return (numDigits > 0 && ++counter[0] <= 8) ? end : -1; 2074 } 2075 2076 2077 /** 2078 * Determine whether a char is a digit. 2079 * 2080 * @return true if the char is betweeen '0' and '9', false otherwise 2081 */ 2082 private static boolean isDigit(char p_char) { 2083 return p_char >= '0' && p_char <= '9'; 2084 } 2085 2086 /** 2087 * Determine whether a character is a hexadecimal character. 2088 * 2089 * @return true if the char is betweeen '0' and '9', 'a' and 'f' 2090 * or 'A' and 'F', false otherwise 2091 */ 2092 private static boolean isHex(char p_char) { 2093 return (p_char <= 'f' && (fgLookupTable[p_char] & ASCII_HEX_CHARACTERS) != 0); 2094 } 2095 2096 /** 2097 * Determine whether a char is an alphabetic character: a-z or A-Z 2098 * 2099 * @return true if the char is alphabetic, false otherwise 2100 */ 2101 private static boolean isAlpha(char p_char) { 2102 return ((p_char >= 'a' && p_char <= 'z') || (p_char >= 'A' && p_char <= 'Z' )); 2103 } 2104 2105 /** 2106 * Determine whether a char is an alphanumeric: 0-9, a-z or A-Z 2107 * 2108 * @return true if the char is alphanumeric, false otherwise 2109 */ 2110 private static boolean isAlphanum(char p_char) { 2111 return (p_char <= 'z' && (fgLookupTable[p_char] & MASK_ALPHA_NUMERIC) != 0); 2112 } 2113 2114 /** 2115 * Determine whether a character is a reserved character: 2116 * ';', '/', '?', ':', '@', '&', '=', '+', '$', ',', '[', or ']' 2117 * 2118 * @return true if the string contains any reserved characters 2119 */ 2120 private static boolean isReservedCharacter(char p_char) { 2121 return (p_char <= ']' && (fgLookupTable[p_char] & RESERVED_CHARACTERS) != 0); 2122 } 2123 2124 /** 2125 * Determine whether a char is an unreserved character. 2126 * 2127 * @return true if the char is unreserved, false otherwise 2128 */ 2129 private static boolean isUnreservedCharacter(char p_char) { 2130 return (p_char <= '~' && (fgLookupTable[p_char] & MASK_UNRESERVED_MASK) != 0); 2131 } 2132 2133 /** 2134 * Determine whether a char is a URI character (reserved or 2135 * unreserved, not including '%' for escaped octets). 2136 * 2137 * @return true if the char is a URI character, false otherwise 2138 */ 2139 private static boolean isURICharacter (char p_char) { 2140 return (p_char <= '~' && (fgLookupTable[p_char] & MASK_URI_CHARACTER) != 0); 2141 } 2142 2143 /** 2144 * Determine whether a char is a scheme character. 2145 * 2146 * @return true if the char is a scheme character, false otherwise 2147 */ 2148 private static boolean isSchemeCharacter (char p_char) { 2149 return (p_char <= 'z' && (fgLookupTable[p_char] & MASK_SCHEME_CHARACTER) != 0); 2150 } 2151 2152 /** 2153 * Determine whether a char is a userinfo character. 2154 * 2155 * @return true if the char is a userinfo character, false otherwise 2156 */ 2157 private static boolean isUserinfoCharacter (char p_char) { 2158 return (p_char <= 'z' && (fgLookupTable[p_char] & MASK_USERINFO_CHARACTER) != 0); 2159 } 2160 2161 /** 2162 * Determine whether a char is a path character. 2163 * 2164 * @return true if the char is a path character, false otherwise 2165 */ 2166 private static boolean isPathCharacter (char p_char) { 2167 return (p_char <= '~' && (fgLookupTable[p_char] & MASK_PATH_CHARACTER) != 0); 2168 } 2169 2170 2171 /** 2172 * Determine whether a given string contains only URI characters (also 2173 * called "uric" in RFC 2396). uric consist of all reserved 2174 * characters, unreserved characters and escaped characters. 2175 * 2176 * @return true if the string is comprised of uric, false otherwise 2177 */ 2178 private static boolean isURIString(String p_uric) { 2179 if (p_uric == null) { 2180 return false; 2181 } 2182 int end = p_uric.length(); 2183 char testChar = '\0'; 2184 for (int i = 0; i < end; i++) { 2185 testChar = p_uric.charAt(i); 2186 if (testChar == '%') { 2187 if (i+2 >= end || 2188 !isHex(p_uric.charAt(i+1)) || 2189 !isHex(p_uric.charAt(i+2))) { 2190 return false; 2191 } 2192 else { 2193 i += 2; 2194 continue; 2195 } 2196 } 2197 if (isURICharacter(testChar)) { 2198 continue; 2199 } 2200 else { 2201 return false; 2202 } 2203 } 2204 return true; 2205 } 2206 }