1 /* 2 * reserved comment block 3 * DO NOT REMOVE OR ALTER! 4 */ 5 /* 6 * Copyright 1999-2005 The Apache Software Foundation. 7 * 8 * Licensed under the Apache License, Version 2.0 (the "License"); 9 * you may not use this file except in compliance with the License. 10 * You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 package com.sun.org.apache.xerces.internal.util; 22 23 import java.io.IOException; 24 import java.io.Serializable; 25 26 /********************************************************************** 27 * A class to represent a Uniform Resource Identifier (URI). This class 28 * is designed to handle the parsing of URIs and provide access to 29 * the various components (scheme, host, port, userinfo, path, query 30 * string and fragment) that may constitute a URI. 31 * <p> 32 * Parsing of a URI specification is done according to the URI 33 * syntax described in 34 * <a href="http://www.ietf.org/rfc/rfc2396.txt?number=2396">RFC 2396</a>, 35 * and amended by 36 * <a href="http://www.ietf.org/rfc/rfc2732.txt?number=2732">RFC 2732</a>. 37 * <p> 38 * Every absolute URI consists of a scheme, followed by a colon (':'), 39 * followed by a scheme-specific part. For URIs that follow the 40 * "generic URI" syntax, the scheme-specific part begins with two 41 * slashes ("//") and may be followed by an authority segment (comprised 42 * of user information, host, and port), path segment, query segment 43 * and fragment. Note that RFC 2396 no longer specifies the use of the 44 * parameters segment and excludes the "user:password" syntax as part of 45 * the authority segment. If "user:password" appears in a URI, the entire 46 * user/password string is stored as userinfo. 47 * <p> 48 * For URIs that do not follow the "generic URI" syntax (e.g. mailto), 49 * the entire scheme-specific part is treated as the "path" portion 50 * of the URI. 51 * <p> 52 * Note that, unlike the java.net.URL class, this class does not provide 53 * any built-in network access functionality nor does it provide any 54 * scheme-specific functionality (for example, it does not know a 55 * default port for a specific scheme). Rather, it only knows the 56 * grammar and basic set of operations that can be applied to a URI. 57 * 58 * 59 **********************************************************************/ 60 public class URI implements Serializable { 61 62 /******************************************************************* 63 * MalformedURIExceptions are thrown in the process of building a URI 64 * or setting fields on a URI when an operation would result in an 65 * invalid URI specification. 66 * 67 ********************************************************************/ 68 public static class MalformedURIException extends IOException { 69 70 /** Serialization version. */ 71 static final long serialVersionUID = -6695054834342951930L; 72 73 /****************************************************************** 74 * Constructs a <code>MalformedURIException</code> with no specified 75 * detail message. 76 ******************************************************************/ 77 public MalformedURIException() { 78 super(); 79 } 80 81 /***************************************************************** 82 * Constructs a <code>MalformedURIException</code> with the 83 * specified detail message. 84 * 85 * @param p_msg the detail message. 86 ******************************************************************/ 87 public MalformedURIException(String p_msg) { 88 super(p_msg); 89 } 90 } 91 92 /** Serialization version. */ 93 static final long serialVersionUID = 1601921774685357214L; 94 95 private static final byte [] fgLookupTable = new byte[128]; 96 97 /** 98 * Character Classes 99 */ 100 101 /** reserved characters ;/?:@&=+$,[] */ 102 //RFC 2732 added '[' and ']' as reserved characters 103 private static final int RESERVED_CHARACTERS = 0x01; 104 105 /** URI punctuation mark characters: -_.!~*'() - these, combined with 106 alphanumerics, constitute the "unreserved" characters */ 107 private static final int MARK_CHARACTERS = 0x02; 108 109 /** scheme can be composed of alphanumerics and these characters: +-. */ 110 private static final int SCHEME_CHARACTERS = 0x04; 111 112 /** userinfo can be composed of unreserved, escaped and these 113 characters: ;:&=+$, */ 114 private static final int USERINFO_CHARACTERS = 0x08; 115 116 /** ASCII letter characters */ 117 private static final int ASCII_ALPHA_CHARACTERS = 0x10; 118 119 /** ASCII digit characters */ 120 private static final int ASCII_DIGIT_CHARACTERS = 0x20; 121 122 /** ASCII hex characters */ 123 private static final int ASCII_HEX_CHARACTERS = 0x40; 124 125 /** Path characters */ 126 private static final int PATH_CHARACTERS = 0x80; 127 128 /** Mask for alpha-numeric characters */ 129 private static final int MASK_ALPHA_NUMERIC = ASCII_ALPHA_CHARACTERS | ASCII_DIGIT_CHARACTERS; 130 131 /** Mask for unreserved characters */ 132 private static final int MASK_UNRESERVED_MASK = MASK_ALPHA_NUMERIC | MARK_CHARACTERS; 133 134 /** Mask for URI allowable characters except for % */ 135 private static final int MASK_URI_CHARACTER = MASK_UNRESERVED_MASK | RESERVED_CHARACTERS; 136 137 /** Mask for scheme characters */ 138 private static final int MASK_SCHEME_CHARACTER = MASK_ALPHA_NUMERIC | SCHEME_CHARACTERS; 139 140 /** Mask for userinfo characters */ 141 private static final int MASK_USERINFO_CHARACTER = MASK_UNRESERVED_MASK | USERINFO_CHARACTERS; 142 143 /** Mask for path characters */ 144 private static final int MASK_PATH_CHARACTER = MASK_UNRESERVED_MASK | PATH_CHARACTERS; 145 146 static { 147 // Add ASCII Digits and ASCII Hex Numbers 148 for (int i = '0'; i <= '9'; ++i) { 149 fgLookupTable[i] |= ASCII_DIGIT_CHARACTERS | ASCII_HEX_CHARACTERS; 150 } 151 152 // Add ASCII Letters and ASCII Hex Numbers 153 for (int i = 'A'; i <= 'F'; ++i) { 154 fgLookupTable[i] |= ASCII_ALPHA_CHARACTERS | ASCII_HEX_CHARACTERS; 155 fgLookupTable[i+0x00000020] |= ASCII_ALPHA_CHARACTERS | ASCII_HEX_CHARACTERS; 156 } 157 158 // Add ASCII Letters 159 for (int i = 'G'; i <= 'Z'; ++i) { 160 fgLookupTable[i] |= ASCII_ALPHA_CHARACTERS; 161 fgLookupTable[i+0x00000020] |= ASCII_ALPHA_CHARACTERS; 162 } 163 164 // Add Reserved Characters 165 fgLookupTable[';'] |= RESERVED_CHARACTERS; 166 fgLookupTable['/'] |= RESERVED_CHARACTERS; 167 fgLookupTable['?'] |= RESERVED_CHARACTERS; 168 fgLookupTable[':'] |= RESERVED_CHARACTERS; 169 fgLookupTable['@'] |= RESERVED_CHARACTERS; 170 fgLookupTable['&'] |= RESERVED_CHARACTERS; 171 fgLookupTable['='] |= RESERVED_CHARACTERS; 172 fgLookupTable['+'] |= RESERVED_CHARACTERS; 173 fgLookupTable['$'] |= RESERVED_CHARACTERS; 174 fgLookupTable[','] |= RESERVED_CHARACTERS; 175 fgLookupTable['['] |= RESERVED_CHARACTERS; 176 fgLookupTable[']'] |= RESERVED_CHARACTERS; 177 178 // Add Mark Characters 179 fgLookupTable['-'] |= MARK_CHARACTERS; 180 fgLookupTable['_'] |= MARK_CHARACTERS; 181 fgLookupTable['.'] |= MARK_CHARACTERS; 182 fgLookupTable['!'] |= MARK_CHARACTERS; 183 fgLookupTable['~'] |= MARK_CHARACTERS; 184 fgLookupTable['*'] |= MARK_CHARACTERS; 185 fgLookupTable['\''] |= MARK_CHARACTERS; 186 fgLookupTable['('] |= MARK_CHARACTERS; 187 fgLookupTable[')'] |= MARK_CHARACTERS; 188 189 // Add Scheme Characters 190 fgLookupTable['+'] |= SCHEME_CHARACTERS; 191 fgLookupTable['-'] |= SCHEME_CHARACTERS; 192 fgLookupTable['.'] |= SCHEME_CHARACTERS; 193 194 // Add Userinfo Characters 195 fgLookupTable[';'] |= USERINFO_CHARACTERS; 196 fgLookupTable[':'] |= USERINFO_CHARACTERS; 197 fgLookupTable['&'] |= USERINFO_CHARACTERS; 198 fgLookupTable['='] |= USERINFO_CHARACTERS; 199 fgLookupTable['+'] |= USERINFO_CHARACTERS; 200 fgLookupTable['$'] |= USERINFO_CHARACTERS; 201 fgLookupTable[','] |= USERINFO_CHARACTERS; 202 203 // Add Path Characters 204 fgLookupTable[';'] |= PATH_CHARACTERS; 205 fgLookupTable['/'] |= PATH_CHARACTERS; 206 fgLookupTable[':'] |= PATH_CHARACTERS; 207 fgLookupTable['@'] |= PATH_CHARACTERS; 208 fgLookupTable['&'] |= PATH_CHARACTERS; 209 fgLookupTable['='] |= PATH_CHARACTERS; 210 fgLookupTable['+'] |= PATH_CHARACTERS; 211 fgLookupTable['$'] |= PATH_CHARACTERS; 212 fgLookupTable[','] |= PATH_CHARACTERS; 213 } 214 215 /** Stores the scheme (usually the protocol) for this URI. */ 216 private String m_scheme = null; 217 218 /** If specified, stores the userinfo for this URI; otherwise null */ 219 private String m_userinfo = null; 220 221 /** If specified, stores the host for this URI; otherwise null */ 222 private String m_host = null; 223 224 /** If specified, stores the port for this URI; otherwise -1 */ 225 private int m_port = -1; 226 227 /** If specified, stores the registry based authority for this URI; otherwise -1 */ 228 private String m_regAuthority = null; 229 230 /** If specified, stores the path for this URI; otherwise null */ 231 private String m_path = null; 232 233 /** If specified, stores the query string for this URI; otherwise 234 null. */ 235 private String m_queryString = null; 236 237 /** If specified, stores the fragment for this URI; otherwise null */ 238 private String m_fragment = null; 239 240 private static boolean DEBUG = false; 241 242 /** 243 * Construct a new and uninitialized URI. 244 */ 245 public URI() { 246 } 247 248 /** 249 * Construct a new URI from another URI. All fields for this URI are 250 * set equal to the fields of the URI passed in. 251 * 252 * @param p_other the URI to copy (cannot be null) 253 */ 254 public URI(URI p_other) { 255 initialize(p_other); 256 } 257 258 /** 259 * Construct a new URI from a URI specification string. If the 260 * specification follows the "generic URI" syntax, (two slashes 261 * following the first colon), the specification will be parsed 262 * accordingly - setting the scheme, userinfo, host,port, path, query 263 * string and fragment fields as necessary. If the specification does 264 * not follow the "generic URI" syntax, the specification is parsed 265 * into a scheme and scheme-specific part (stored as the path) only. 266 * 267 * @param p_uriSpec the URI specification string (cannot be null or 268 * empty) 269 * 270 * @exception MalformedURIException if p_uriSpec violates any syntax 271 * rules 272 */ 273 public URI(String p_uriSpec) throws MalformedURIException { 274 this((URI)null, p_uriSpec); 275 } 276 277 /** 278 * Construct a new URI from a URI specification string. If the 279 * specification follows the "generic URI" syntax, (two slashes 280 * following the first colon), the specification will be parsed 281 * accordingly - setting the scheme, userinfo, host,port, path, query 282 * string and fragment fields as necessary. If the specification does 283 * not follow the "generic URI" syntax, the specification is parsed 284 * into a scheme and scheme-specific part (stored as the path) only. 285 * Construct a relative URI if boolean is assigned to "true" 286 * and p_uriSpec is not valid absolute URI, instead of throwing an exception. 287 * 288 * @param p_uriSpec the URI specification string (cannot be null or 289 * empty) 290 * @param allowNonAbsoluteURI true to permit non-absolute URIs, 291 * false otherwise. 292 * 293 * @exception MalformedURIException if p_uriSpec violates any syntax 294 * rules 295 */ 296 public URI(String p_uriSpec, boolean allowNonAbsoluteURI) throws MalformedURIException { 297 this((URI)null, p_uriSpec, allowNonAbsoluteURI); 298 } 299 300 /** 301 * Construct a new URI from a base URI and a URI specification string. 302 * The URI specification string may be a relative URI. 303 * 304 * @param p_base the base URI (cannot be null if p_uriSpec is null or 305 * empty) 306 * @param p_uriSpec the URI specification string (cannot be null or 307 * empty if p_base is null) 308 * 309 * @exception MalformedURIException if p_uriSpec violates any syntax 310 * rules 311 */ 312 public URI(URI p_base, String p_uriSpec) throws MalformedURIException { 313 initialize(p_base, p_uriSpec); 314 } 315 316 /** 317 * Construct a new URI from a base URI and a URI specification string. 318 * The URI specification string may be a relative URI. 319 * Construct a relative URI if boolean is assigned to "true" 320 * and p_uriSpec is not valid absolute URI and p_base is null 321 * instead of throwing an exception. 322 * 323 * @param p_base the base URI (cannot be null if p_uriSpec is null or 324 * empty) 325 * @param p_uriSpec the URI specification string (cannot be null or 326 * empty if p_base is null) 327 * @param allowNonAbsoluteURI true to permit non-absolute URIs, 328 * false otherwise. 329 * 330 * @exception MalformedURIException if p_uriSpec violates any syntax 331 * rules 332 */ 333 public URI(URI p_base, String p_uriSpec, boolean allowNonAbsoluteURI) throws MalformedURIException { 334 initialize(p_base, p_uriSpec, allowNonAbsoluteURI); 335 } 336 337 /** 338 * Construct a new URI that does not follow the generic URI syntax. 339 * Only the scheme and scheme-specific part (stored as the path) are 340 * initialized. 341 * 342 * @param p_scheme the URI scheme (cannot be null or empty) 343 * @param p_schemeSpecificPart the scheme-specific part (cannot be 344 * null or empty) 345 * 346 * @exception MalformedURIException if p_scheme violates any 347 * syntax rules 348 */ 349 public URI(String p_scheme, String p_schemeSpecificPart) 350 throws MalformedURIException { 351 if (p_scheme == null || p_scheme.trim().length() == 0) { 352 throw new MalformedURIException( 353 "Cannot construct URI with null/empty scheme!"); 354 } 355 if (p_schemeSpecificPart == null || 356 p_schemeSpecificPart.trim().length() == 0) { 357 throw new MalformedURIException( 358 "Cannot construct URI with null/empty scheme-specific part!"); 359 } 360 setScheme(p_scheme); 361 setPath(p_schemeSpecificPart); 362 } 363 364 /** 365 * Construct a new URI that follows the generic URI syntax from its 366 * component parts. Each component is validated for syntax and some 367 * basic semantic checks are performed as well. See the individual 368 * setter methods for specifics. 369 * 370 * @param p_scheme the URI scheme (cannot be null or empty) 371 * @param p_host the hostname, IPv4 address or IPv6 reference for the URI 372 * @param p_path the URI path - if the path contains '?' or '#', 373 * then the query string and/or fragment will be 374 * set from the path; however, if the query and 375 * fragment are specified both in the path and as 376 * separate parameters, an exception is thrown 377 * @param p_queryString the URI query string (cannot be specified 378 * if path is null) 379 * @param p_fragment the URI fragment (cannot be specified if path 380 * is null) 381 * 382 * @exception MalformedURIException if any of the parameters violates 383 * syntax rules or semantic rules 384 */ 385 public URI(String p_scheme, String p_host, String p_path, 386 String p_queryString, String p_fragment) 387 throws MalformedURIException { 388 this(p_scheme, null, p_host, -1, p_path, p_queryString, p_fragment); 389 } 390 391 /** 392 * Construct a new URI that follows the generic URI syntax from its 393 * component parts. Each component is validated for syntax and some 394 * basic semantic checks are performed as well. See the individual 395 * setter methods for specifics. 396 * 397 * @param p_scheme the URI scheme (cannot be null or empty) 398 * @param p_userinfo the URI userinfo (cannot be specified if host 399 * is null) 400 * @param p_host the hostname, IPv4 address or IPv6 reference for the URI 401 * @param p_port the URI port (may be -1 for "unspecified"; cannot 402 * be specified if host is null) 403 * @param p_path the URI path - if the path contains '?' or '#', 404 * then the query string and/or fragment will be 405 * set from the path; however, if the query and 406 * fragment are specified both in the path and as 407 * separate parameters, an exception is thrown 408 * @param p_queryString the URI query string (cannot be specified 409 * if path is null) 410 * @param p_fragment the URI fragment (cannot be specified if path 411 * is null) 412 * 413 * @exception MalformedURIException if any of the parameters violates 414 * syntax rules or semantic rules 415 */ 416 public URI(String p_scheme, String p_userinfo, 417 String p_host, int p_port, String p_path, 418 String p_queryString, String p_fragment) 419 throws MalformedURIException { 420 if (p_scheme == null || p_scheme.trim().length() == 0) { 421 throw new MalformedURIException("Scheme is required!"); 422 } 423 424 if (p_host == null) { 425 if (p_userinfo != null) { 426 throw new MalformedURIException( 427 "Userinfo may not be specified if host is not specified!"); 428 } 429 if (p_port != -1) { 430 throw new MalformedURIException( 431 "Port may not be specified if host is not specified!"); 432 } 433 } 434 435 if (p_path != null) { 436 if (p_path.indexOf('?') != -1 && p_queryString != null) { 437 throw new MalformedURIException( 438 "Query string cannot be specified in path and query string!"); 439 } 440 441 if (p_path.indexOf('#') != -1 && p_fragment != null) { 442 throw new MalformedURIException( 443 "Fragment cannot be specified in both the path and fragment!"); 444 } 445 } 446 447 setScheme(p_scheme); 448 setHost(p_host); 449 setPort(p_port); 450 setUserinfo(p_userinfo); 451 setPath(p_path); 452 setQueryString(p_queryString); 453 setFragment(p_fragment); 454 } 455 456 /** 457 * Initialize all fields of this URI from another URI. 458 * 459 * @param p_other the URI to copy (cannot be null) 460 */ 461 private void initialize(URI p_other) { 462 m_scheme = p_other.getScheme(); 463 m_userinfo = p_other.getUserinfo(); 464 m_host = p_other.getHost(); 465 m_port = p_other.getPort(); 466 m_regAuthority = p_other.getRegBasedAuthority(); 467 m_path = p_other.getPath(); 468 m_queryString = p_other.getQueryString(); 469 m_fragment = p_other.getFragment(); 470 } 471 472 /** 473 * Initializes this URI from a base URI and a URI specification string. 474 * See RFC 2396 Section 4 and Appendix B for specifications on parsing 475 * the URI and Section 5 for specifications on resolving relative URIs 476 * and relative paths. 477 * 478 * @param p_base the base URI (may be null if p_uriSpec is an absolute 479 * URI) 480 * @param p_uriSpec the URI spec string which may be an absolute or 481 * relative URI (can only be null/empty if p_base 482 * is not null) 483 * @param allowNonAbsoluteURI true to permit non-absolute URIs, 484 * in case of relative URI, false otherwise. 485 * 486 * @exception MalformedURIException if p_base is null and p_uriSpec 487 * is not an absolute URI or if 488 * p_uriSpec violates syntax rules 489 */ 490 private void initialize(URI p_base, String p_uriSpec, boolean allowNonAbsoluteURI) 491 throws MalformedURIException { 492 493 String uriSpec = p_uriSpec; 494 int uriSpecLen = (uriSpec != null) ? uriSpec.length() : 0; 495 496 if (p_base == null && uriSpecLen == 0) { 497 if (allowNonAbsoluteURI) { 498 m_path = ""; 499 return; 500 } 501 throw new MalformedURIException("Cannot initialize URI with empty parameters."); 502 } 503 504 // just make a copy of the base if spec is empty 505 if (uriSpecLen == 0) { 506 initialize(p_base); 507 return; 508 } 509 510 int index = 0; 511 512 // Check for scheme, which must be before '/', '?' or '#'. 513 int colonIdx = uriSpec.indexOf(':'); 514 if (colonIdx != -1) { 515 final int searchFrom = colonIdx - 1; 516 // search backwards starting from character before ':'. 517 int slashIdx = uriSpec.lastIndexOf('/', searchFrom); 518 int queryIdx = uriSpec.lastIndexOf('?', searchFrom); 519 int fragmentIdx = uriSpec.lastIndexOf('#', searchFrom); 520 521 if (colonIdx == 0 || slashIdx != -1 || 522 queryIdx != -1 || fragmentIdx != -1) { 523 // A standalone base is a valid URI according to spec 524 if (colonIdx == 0 || (p_base == null && fragmentIdx != 0 && !allowNonAbsoluteURI)) { 525 throw new MalformedURIException("No scheme found in URI."); 526 } 527 } 528 else { 529 initializeScheme(uriSpec); 530 index = m_scheme.length()+1; 531 532 // Neither 'scheme:' or 'scheme:#fragment' are valid URIs. 533 if (colonIdx == uriSpecLen - 1 || uriSpec.charAt(colonIdx+1) == '#') { 534 throw new MalformedURIException("Scheme specific part cannot be empty."); 535 } 536 } 537 } 538 else if (p_base == null && uriSpec.indexOf('#') != 0 && !allowNonAbsoluteURI) { 539 throw new MalformedURIException("No scheme found in URI."); 540 } 541 542 // Two slashes means we may have authority, but definitely means we're either 543 // matching net_path or abs_path. These two productions are ambiguous in that 544 // every net_path (except those containing an IPv6Reference) is an abs_path. 545 // RFC 2396 resolves this ambiguity by applying a greedy left most matching rule. 546 // Try matching net_path first, and if that fails we don't have authority so 547 // then attempt to match abs_path. 548 // 549 // net_path = "//" authority [ abs_path ] 550 // abs_path = "/" path_segments 551 if (((index+1) < uriSpecLen) && 552 (uriSpec.charAt(index) == '/' && uriSpec.charAt(index+1) == '/')) { 553 index += 2; 554 int startPos = index; 555 556 // Authority will be everything up to path, query or fragment 557 char testChar = '\0'; 558 while (index < uriSpecLen) { 559 testChar = uriSpec.charAt(index); 560 if (testChar == '/' || testChar == '?' || testChar == '#') { 561 break; 562 } 563 index++; 564 } 565 566 // Attempt to parse authority. If the section is an empty string 567 // this is a valid server based authority, so set the host to this 568 // value. 569 if (index > startPos) { 570 // If we didn't find authority we need to back up. Attempt to 571 // match against abs_path next. 572 if (!initializeAuthority(uriSpec.substring(startPos, index))) { 573 index = startPos - 2; 574 } 575 } 576 else { 577 m_host = ""; 578 } 579 } 580 581 initializePath(uriSpec, index); 582 583 // Resolve relative URI to base URI - see RFC 2396 Section 5.2 584 // In some cases, it might make more sense to throw an exception 585 // (when scheme is specified is the string spec and the base URI 586 // is also specified, for example), but we're just following the 587 // RFC specifications 588 if (p_base != null) { 589 absolutize(p_base); 590 } 591 } 592 593 /** 594 * Initializes this URI from a base URI and a URI specification string. 595 * See RFC 2396 Section 4 and Appendix B for specifications on parsing 596 * the URI and Section 5 for specifications on resolving relative URIs 597 * and relative paths. 598 * 599 * @param p_base the base URI (may be null if p_uriSpec is an absolute 600 * URI) 601 * @param p_uriSpec the URI spec string which may be an absolute or 602 * relative URI (can only be null/empty if p_base 603 * is not null) 604 * 605 * @exception MalformedURIException if p_base is null and p_uriSpec 606 * is not an absolute URI or if 607 * p_uriSpec violates syntax rules 608 */ 609 private void initialize(URI p_base, String p_uriSpec) 610 throws MalformedURIException { 611 612 String uriSpec = p_uriSpec; 613 int uriSpecLen = (uriSpec != null) ? uriSpec.length() : 0; 614 615 if (p_base == null && uriSpecLen == 0) { 616 throw new MalformedURIException( 617 "Cannot initialize URI with empty parameters."); 618 } 619 620 // just make a copy of the base if spec is empty 621 if (uriSpecLen == 0) { 622 initialize(p_base); 623 return; 624 } 625 626 int index = 0; 627 628 // Check for scheme, which must be before '/', '?' or '#'. 629 int colonIdx = uriSpec.indexOf(':'); 630 if (colonIdx != -1) { 631 final int searchFrom = colonIdx - 1; 632 // search backwards starting from character before ':'. 633 int slashIdx = uriSpec.lastIndexOf('/', searchFrom); 634 int queryIdx = uriSpec.lastIndexOf('?', searchFrom); 635 int fragmentIdx = uriSpec.lastIndexOf('#', searchFrom); 636 637 if (colonIdx == 0 || slashIdx != -1 || 638 queryIdx != -1 || fragmentIdx != -1) { 639 // A standalone base is a valid URI according to spec 640 if (colonIdx == 0 || (p_base == null && fragmentIdx != 0)) { 641 throw new MalformedURIException("No scheme found in URI."); 642 } 643 } 644 else { 645 initializeScheme(uriSpec); 646 index = m_scheme.length()+1; 647 648 // Neither 'scheme:' or 'scheme:#fragment' are valid URIs. 649 if (colonIdx == uriSpecLen - 1 || uriSpec.charAt(colonIdx+1) == '#') { 650 throw new MalformedURIException("Scheme specific part cannot be empty."); 651 } 652 } 653 } 654 else if (p_base == null && uriSpec.indexOf('#') != 0) { 655 throw new MalformedURIException("No scheme found in URI."); 656 } 657 658 // Two slashes means we may have authority, but definitely means we're either 659 // matching net_path or abs_path. These two productions are ambiguous in that 660 // every net_path (except those containing an IPv6Reference) is an abs_path. 661 // RFC 2396 resolves this ambiguity by applying a greedy left most matching rule. 662 // Try matching net_path first, and if that fails we don't have authority so 663 // then attempt to match abs_path. 664 // 665 // net_path = "//" authority [ abs_path ] 666 // abs_path = "/" path_segments 667 if (((index+1) < uriSpecLen) && 668 (uriSpec.charAt(index) == '/' && uriSpec.charAt(index+1) == '/')) { 669 index += 2; 670 int startPos = index; 671 672 // Authority will be everything up to path, query or fragment 673 char testChar = '\0'; 674 while (index < uriSpecLen) { 675 testChar = uriSpec.charAt(index); 676 if (testChar == '/' || testChar == '?' || testChar == '#') { 677 break; 678 } 679 index++; 680 } 681 682 // Attempt to parse authority. If the section is an empty string 683 // this is a valid server based authority, so set the host to this 684 // value. 685 if (index > startPos) { 686 // If we didn't find authority we need to back up. Attempt to 687 // match against abs_path next. 688 if (!initializeAuthority(uriSpec.substring(startPos, index))) { 689 index = startPos - 2; 690 } 691 } 692 else { 693 m_host = ""; 694 } 695 } 696 697 initializePath(uriSpec, index); 698 699 // Resolve relative URI to base URI - see RFC 2396 Section 5.2 700 // In some cases, it might make more sense to throw an exception 701 // (when scheme is specified is the string spec and the base URI 702 // is also specified, for example), but we're just following the 703 // RFC specifications 704 if (p_base != null) { 705 absolutize(p_base); 706 } 707 } 708 709 /** 710 * Absolutize URI with given base URI. 711 * 712 * @param p_base base URI for absolutization 713 */ 714 public void absolutize(URI p_base) { 715 716 // check to see if this is the current doc - RFC 2396 5.2 #2 717 // note that this is slightly different from the RFC spec in that 718 // we don't include the check for query string being null 719 // - this handles cases where the urispec is just a query 720 // string or a fragment (e.g. "?y" or "#s") - 721 // see <http://www.ics.uci.edu/~fielding/url/test1.html> which 722 // identified this as a bug in the RFC 723 if (m_path.length() == 0 && m_scheme == null && 724 m_host == null && m_regAuthority == null) { 725 m_scheme = p_base.getScheme(); 726 m_userinfo = p_base.getUserinfo(); 727 m_host = p_base.getHost(); 728 m_port = p_base.getPort(); 729 m_regAuthority = p_base.getRegBasedAuthority(); 730 m_path = p_base.getPath(); 731 732 if (m_queryString == null) { 733 m_queryString = p_base.getQueryString(); 734 735 if (m_fragment == null) { 736 m_fragment = p_base.getFragment(); 737 } 738 } 739 return; 740 } 741 742 // check for scheme - RFC 2396 5.2 #3 743 // if we found a scheme, it means absolute URI, so we're done 744 if (m_scheme == null) { 745 m_scheme = p_base.getScheme(); 746 } 747 else { 748 return; 749 } 750 751 // check for authority - RFC 2396 5.2 #4 752 // if we found a host, then we've got a network path, so we're done 753 if (m_host == null && m_regAuthority == null) { 754 m_userinfo = p_base.getUserinfo(); 755 m_host = p_base.getHost(); 756 m_port = p_base.getPort(); 757 m_regAuthority = p_base.getRegBasedAuthority(); 758 } 759 else { 760 return; 761 } 762 763 // check for absolute path - RFC 2396 5.2 #5 764 if (m_path.length() > 0 && 765 m_path.startsWith("/")) { 766 return; 767 } 768 769 // if we get to this point, we need to resolve relative path 770 // RFC 2396 5.2 #6 771 String path = ""; 772 String basePath = p_base.getPath(); 773 774 // 6a - get all but the last segment of the base URI path 775 if (basePath != null && basePath.length() > 0) { 776 int lastSlash = basePath.lastIndexOf('/'); 777 if (lastSlash != -1) { 778 path = basePath.substring(0, lastSlash+1); 779 } 780 } 781 else if (m_path.length() > 0) { 782 path = "/"; 783 } 784 785 // 6b - append the relative URI path 786 path = path.concat(m_path); 787 788 // 6c - remove all "./" where "." is a complete path segment 789 int index = -1; 790 while ((index = path.indexOf("/./")) != -1) { 791 path = path.substring(0, index+1).concat(path.substring(index+3)); 792 } 793 794 // 6d - remove "." if path ends with "." as a complete path segment 795 if (path.endsWith("/.")) { 796 path = path.substring(0, path.length()-1); 797 } 798 799 // 6e - remove all "<segment>/../" where "<segment>" is a complete 800 // path segment not equal to ".." 801 index = 1; 802 int segIndex = -1; 803 String tempString = null; 804 805 while ((index = path.indexOf("/../", index)) > 0) { 806 tempString = path.substring(0, path.indexOf("/../")); 807 segIndex = tempString.lastIndexOf('/'); 808 if (segIndex != -1) { 809 if (!tempString.substring(segIndex).equals("..")) { 810 path = path.substring(0, segIndex+1).concat(path.substring(index+4)); 811 index = segIndex; 812 } 813 else { 814 index += 4; 815 } 816 } 817 else { 818 index += 4; 819 } 820 } 821 822 // 6f - remove ending "<segment>/.." where "<segment>" is a 823 // complete path segment 824 if (path.endsWith("/..")) { 825 tempString = path.substring(0, path.length()-3); 826 segIndex = tempString.lastIndexOf('/'); 827 if (segIndex != -1) { 828 path = path.substring(0, segIndex+1); 829 } 830 } 831 m_path = path; 832 } 833 834 /** 835 * Initialize the scheme for this URI from a URI string spec. 836 * 837 * @param p_uriSpec the URI specification (cannot be null) 838 * 839 * @exception MalformedURIException if URI does not have a conformant 840 * scheme 841 */ 842 private void initializeScheme(String p_uriSpec) 843 throws MalformedURIException { 844 int uriSpecLen = p_uriSpec.length(); 845 int index = 0; 846 String scheme = null; 847 char testChar = '\0'; 848 849 while (index < uriSpecLen) { 850 testChar = p_uriSpec.charAt(index); 851 if (testChar == ':' || testChar == '/' || 852 testChar == '?' || testChar == '#') { 853 break; 854 } 855 index++; 856 } 857 scheme = p_uriSpec.substring(0, index); 858 859 if (scheme.length() == 0) { 860 throw new MalformedURIException("No scheme found in URI."); 861 } 862 else { 863 setScheme(scheme); 864 } 865 } 866 867 /** 868 * Initialize the authority (either server or registry based) 869 * for this URI from a URI string spec. 870 * 871 * @param p_uriSpec the URI specification (cannot be null) 872 * 873 * @return true if the given string matched server or registry 874 * based authority 875 */ 876 private boolean initializeAuthority(String p_uriSpec) { 877 878 int index = 0; 879 int start = 0; 880 int end = p_uriSpec.length(); 881 882 char testChar = '\0'; 883 String userinfo = null; 884 885 // userinfo is everything up to @ 886 if (p_uriSpec.indexOf('@', start) != -1) { 887 while (index < end) { 888 testChar = p_uriSpec.charAt(index); 889 if (testChar == '@') { 890 break; 891 } 892 index++; 893 } 894 userinfo = p_uriSpec.substring(start, index); 895 index++; 896 } 897 898 // host is everything up to last ':', or up to 899 // and including ']' if followed by ':'. 900 String host = null; 901 start = index; 902 boolean hasPort = false; 903 if (index < end) { 904 if (p_uriSpec.charAt(start) == '[') { 905 int bracketIndex = p_uriSpec.indexOf(']', start); 906 index = (bracketIndex != -1) ? bracketIndex : end; 907 if (index+1 < end && p_uriSpec.charAt(index+1) == ':') { 908 ++index; 909 hasPort = true; 910 } 911 else { 912 index = end; 913 } 914 } 915 else { 916 int colonIndex = p_uriSpec.lastIndexOf(':', end); 917 index = (colonIndex > start) ? colonIndex : end; 918 hasPort = (index != end); 919 } 920 } 921 host = p_uriSpec.substring(start, index); 922 int port = -1; 923 if (host.length() > 0) { 924 // port 925 if (hasPort) { 926 index++; 927 start = index; 928 while (index < end) { 929 index++; 930 } 931 String portStr = p_uriSpec.substring(start, index); 932 if (portStr.length() > 0) { 933 // REVISIT: Remove this code. 934 /** for (int i = 0; i < portStr.length(); i++) { 935 if (!isDigit(portStr.charAt(i))) { 936 throw new MalformedURIException( 937 portStr + 938 " is invalid. Port should only contain digits!"); 939 } 940 }**/ 941 // REVISIT: Remove this code. 942 // Store port value as string instead of integer. 943 try { 944 port = Integer.parseInt(portStr); 945 if (port == -1) --port; 946 } 947 catch (NumberFormatException nfe) { 948 port = -2; 949 } 950 } 951 } 952 } 953 954 if (isValidServerBasedAuthority(host, port, userinfo)) { 955 m_host = host; 956 m_port = port; 957 m_userinfo = userinfo; 958 return true; 959 } 960 // Note: Registry based authority is being removed from a 961 // new spec for URI which would obsolete RFC 2396. If the 962 // spec is added to XML errata, processing of reg_name 963 // needs to be removed. - mrglavas. 964 else if (isValidRegistryBasedAuthority(p_uriSpec)) { 965 m_regAuthority = p_uriSpec; 966 return true; 967 } 968 return false; 969 } 970 971 /** 972 * Determines whether the components host, port, and user info 973 * are valid as a server authority. 974 * 975 * @param host the host component of authority 976 * @param port the port number component of authority 977 * @param userinfo the user info component of authority 978 * 979 * @return true if the given host, port, and userinfo compose 980 * a valid server authority 981 */ 982 private boolean isValidServerBasedAuthority(String host, int port, String userinfo) { 983 984 // Check if the host is well formed. 985 if (!isWellFormedAddress(host)) { 986 return false; 987 } 988 989 // Check that port is well formed if it exists. 990 // REVISIT: There's no restriction on port value ranges, but 991 // perform the same check as in setPort to be consistent. Pass 992 // in a string to this method instead of an integer. 993 if (port < -1 || port > 65535) { 994 return false; 995 } 996 997 // Check that userinfo is well formed if it exists. 998 if (userinfo != null) { 999 // Userinfo can contain alphanumerics, mark characters, escaped 1000 // and ';',':','&','=','+','$',',' 1001 int index = 0; 1002 int end = userinfo.length(); 1003 char testChar = '\0'; 1004 while (index < end) { 1005 testChar = userinfo.charAt(index); 1006 if (testChar == '%') { 1007 if (index+2 >= end || 1008 !isHex(userinfo.charAt(index+1)) || 1009 !isHex(userinfo.charAt(index+2))) { 1010 return false; 1011 } 1012 index += 2; 1013 } 1014 else if (!isUserinfoCharacter(testChar)) { 1015 return false; 1016 } 1017 ++index; 1018 } 1019 } 1020 return true; 1021 } 1022 1023 /** 1024 * Determines whether the given string is a registry based authority. 1025 * 1026 * @param authority the authority component of a URI 1027 * 1028 * @return true if the given string is a registry based authority 1029 */ 1030 private boolean isValidRegistryBasedAuthority(String authority) { 1031 int index = 0; 1032 int end = authority.length(); 1033 char testChar; 1034 1035 while (index < end) { 1036 testChar = authority.charAt(index); 1037 1038 // check for valid escape sequence 1039 if (testChar == '%') { 1040 if (index+2 >= end || 1041 !isHex(authority.charAt(index+1)) || 1042 !isHex(authority.charAt(index+2))) { 1043 return false; 1044 } 1045 index += 2; 1046 } 1047 // can check against path characters because the set 1048 // is the same except for '/' which we've already excluded. 1049 else if (!isPathCharacter(testChar)) { 1050 return false; 1051 } 1052 ++index; 1053 } 1054 return true; 1055 } 1056 1057 /** 1058 * Initialize the path for this URI from a URI string spec. 1059 * 1060 * @param p_uriSpec the URI specification (cannot be null) 1061 * @param p_nStartIndex the index to begin scanning from 1062 * 1063 * @exception MalformedURIException if p_uriSpec violates syntax rules 1064 */ 1065 private void initializePath(String p_uriSpec, int p_nStartIndex) 1066 throws MalformedURIException { 1067 if (p_uriSpec == null) { 1068 throw new MalformedURIException( 1069 "Cannot initialize path from null string!"); 1070 } 1071 1072 int index = p_nStartIndex; 1073 int start = p_nStartIndex; 1074 int end = p_uriSpec.length(); 1075 char testChar = '\0'; 1076 1077 // path - everything up to query string or fragment 1078 if (start < end) { 1079 // RFC 2732 only allows '[' and ']' to appear in the opaque part. 1080 if (getScheme() == null || p_uriSpec.charAt(start) == '/') { 1081 1082 // Scan path. 1083 // abs_path = "/" path_segments 1084 // rel_path = rel_segment [ abs_path ] 1085 while (index < end) { 1086 testChar = p_uriSpec.charAt(index); 1087 1088 // check for valid escape sequence 1089 if (testChar == '%') { 1090 if (index+2 >= end || 1091 !isHex(p_uriSpec.charAt(index+1)) || 1092 !isHex(p_uriSpec.charAt(index+2))) { 1093 throw new MalformedURIException( 1094 "Path contains invalid escape sequence!"); 1095 } 1096 index += 2; 1097 } 1098 // Path segments cannot contain '[' or ']' since pchar 1099 // production was not changed by RFC 2732. 1100 else if (!isPathCharacter(testChar)) { 1101 if (testChar == '?' || testChar == '#') { 1102 break; 1103 } 1104 throw new MalformedURIException( 1105 "Path contains invalid character: " + testChar); 1106 } 1107 ++index; 1108 } 1109 } 1110 else { 1111 1112 // Scan opaque part. 1113 // opaque_part = uric_no_slash *uric 1114 while (index < end) { 1115 testChar = p_uriSpec.charAt(index); 1116 1117 if (testChar == '?' || testChar == '#') { 1118 break; 1119 } 1120 1121 // check for valid escape sequence 1122 if (testChar == '%') { 1123 if (index+2 >= end || 1124 !isHex(p_uriSpec.charAt(index+1)) || 1125 !isHex(p_uriSpec.charAt(index+2))) { 1126 throw new MalformedURIException( 1127 "Opaque part contains invalid escape sequence!"); 1128 } 1129 index += 2; 1130 } 1131 // If the scheme specific part is opaque, it can contain '[' 1132 // and ']'. uric_no_slash wasn't modified by RFC 2732, which 1133 // I've interpreted as an error in the spec, since the 1134 // production should be equivalent to (uric - '/'), and uric 1135 // contains '[' and ']'. - mrglavas 1136 else if (!isURICharacter(testChar)) { 1137 throw new MalformedURIException( 1138 "Opaque part contains invalid character: " + testChar); 1139 } 1140 ++index; 1141 } 1142 } 1143 } 1144 m_path = p_uriSpec.substring(start, index); 1145 1146 // query - starts with ? and up to fragment or end 1147 if (testChar == '?') { 1148 index++; 1149 start = index; 1150 while (index < end) { 1151 testChar = p_uriSpec.charAt(index); 1152 if (testChar == '#') { 1153 break; 1154 } 1155 if (testChar == '%') { 1156 if (index+2 >= end || 1157 !isHex(p_uriSpec.charAt(index+1)) || 1158 !isHex(p_uriSpec.charAt(index+2))) { 1159 throw new MalformedURIException( 1160 "Query string contains invalid escape sequence!"); 1161 } 1162 index += 2; 1163 } 1164 else if (!isURICharacter(testChar)) { 1165 throw new MalformedURIException( 1166 "Query string contains invalid character: " + testChar); 1167 } 1168 index++; 1169 } 1170 m_queryString = p_uriSpec.substring(start, index); 1171 } 1172 1173 // fragment - starts with # 1174 if (testChar == '#') { 1175 index++; 1176 start = index; 1177 while (index < end) { 1178 testChar = p_uriSpec.charAt(index); 1179 1180 if (testChar == '%') { 1181 if (index+2 >= end || 1182 !isHex(p_uriSpec.charAt(index+1)) || 1183 !isHex(p_uriSpec.charAt(index+2))) { 1184 throw new MalformedURIException( 1185 "Fragment contains invalid escape sequence!"); 1186 } 1187 index += 2; 1188 } 1189 else if (!isURICharacter(testChar)) { 1190 throw new MalformedURIException( 1191 "Fragment contains invalid character: "+testChar); 1192 } 1193 index++; 1194 } 1195 m_fragment = p_uriSpec.substring(start, index); 1196 } 1197 } 1198 1199 /** 1200 * Get the scheme for this URI. 1201 * 1202 * @return the scheme for this URI 1203 */ 1204 public String getScheme() { 1205 return m_scheme; 1206 } 1207 1208 /** 1209 * Get the scheme-specific part for this URI (everything following the 1210 * scheme and the first colon). See RFC 2396 Section 5.2 for spec. 1211 * 1212 * @return the scheme-specific part for this URI 1213 */ 1214 public String getSchemeSpecificPart() { 1215 StringBuffer schemespec = new StringBuffer(); 1216 1217 if (m_host != null || m_regAuthority != null) { 1218 schemespec.append("//"); 1219 1220 // Server based authority. 1221 if (m_host != null) { 1222 1223 if (m_userinfo != null) { 1224 schemespec.append(m_userinfo); 1225 schemespec.append('@'); 1226 } 1227 1228 schemespec.append(m_host); 1229 1230 if (m_port != -1) { 1231 schemespec.append(':'); 1232 schemespec.append(m_port); 1233 } 1234 } 1235 // Registry based authority. 1236 else { 1237 schemespec.append(m_regAuthority); 1238 } 1239 } 1240 1241 if (m_path != null) { 1242 schemespec.append((m_path)); 1243 } 1244 1245 if (m_queryString != null) { 1246 schemespec.append('?'); 1247 schemespec.append(m_queryString); 1248 } 1249 1250 if (m_fragment != null) { 1251 schemespec.append('#'); 1252 schemespec.append(m_fragment); 1253 } 1254 1255 return schemespec.toString(); 1256 } 1257 1258 /** 1259 * Get the userinfo for this URI. 1260 * 1261 * @return the userinfo for this URI (null if not specified). 1262 */ 1263 public String getUserinfo() { 1264 return m_userinfo; 1265 } 1266 1267 /** 1268 * Get the host for this URI. 1269 * 1270 * @return the host for this URI (null if not specified). 1271 */ 1272 public String getHost() { 1273 return m_host; 1274 } 1275 1276 /** 1277 * Get the port for this URI. 1278 * 1279 * @return the port for this URI (-1 if not specified). 1280 */ 1281 public int getPort() { 1282 return m_port; 1283 } 1284 1285 /** 1286 * Get the registry based authority for this URI. 1287 * 1288 * @return the registry based authority (null if not specified). 1289 */ 1290 public String getRegBasedAuthority() { 1291 return m_regAuthority; 1292 } 1293 1294 /** 1295 * Get the authority for this URI. 1296 * 1297 * @return the authority 1298 */ 1299 public String getAuthority() { 1300 StringBuffer authority = new StringBuffer(); 1301 if (m_host != null || m_regAuthority != null) { 1302 authority.append("//"); 1303 1304 // Server based authority. 1305 if (m_host != null) { 1306 1307 if (m_userinfo != null) { 1308 authority.append(m_userinfo); 1309 authority.append('@'); 1310 } 1311 1312 authority.append(m_host); 1313 1314 if (m_port != -1) { 1315 authority.append(':'); 1316 authority.append(m_port); 1317 } 1318 } 1319 // Registry based authority. 1320 else { 1321 authority.append(m_regAuthority); 1322 } 1323 } 1324 return authority.toString(); 1325 } 1326 1327 /** 1328 * Get the path for this URI (optionally with the query string and 1329 * fragment). 1330 * 1331 * @param p_includeQueryString if true (and query string is not null), 1332 * then a "?" followed by the query string 1333 * will be appended 1334 * @param p_includeFragment if true (and fragment is not null), 1335 * then a "#" followed by the fragment 1336 * will be appended 1337 * 1338 * @return the path for this URI possibly including the query string 1339 * and fragment 1340 */ 1341 public String getPath(boolean p_includeQueryString, 1342 boolean p_includeFragment) { 1343 StringBuffer pathString = new StringBuffer(m_path); 1344 1345 if (p_includeQueryString && m_queryString != null) { 1346 pathString.append('?'); 1347 pathString.append(m_queryString); 1348 } 1349 1350 if (p_includeFragment && m_fragment != null) { 1351 pathString.append('#'); 1352 pathString.append(m_fragment); 1353 } 1354 return pathString.toString(); 1355 } 1356 1357 /** 1358 * Get the path for this URI. Note that the value returned is the path 1359 * only and does not include the query string or fragment. 1360 * 1361 * @return the path for this URI. 1362 */ 1363 public String getPath() { 1364 return m_path; 1365 } 1366 1367 /** 1368 * Get the query string for this URI. 1369 * 1370 * @return the query string for this URI. Null is returned if there 1371 * was no "?" in the URI spec, empty string if there was a 1372 * "?" but no query string following it. 1373 */ 1374 public String getQueryString() { 1375 return m_queryString; 1376 } 1377 1378 /** 1379 * Get the fragment for this URI. 1380 * 1381 * @return the fragment for this URI. Null is returned if there 1382 * was no "#" in the URI spec, empty string if there was a 1383 * "#" but no fragment following it. 1384 */ 1385 public String getFragment() { 1386 return m_fragment; 1387 } 1388 1389 /** 1390 * Set the scheme for this URI. The scheme is converted to lowercase 1391 * before it is set. 1392 * 1393 * @param p_scheme the scheme for this URI (cannot be null) 1394 * 1395 * @exception MalformedURIException if p_scheme is not a conformant 1396 * scheme name 1397 */ 1398 public void setScheme(String p_scheme) throws MalformedURIException { 1399 if (p_scheme == null) { 1400 throw new MalformedURIException( 1401 "Cannot set scheme from null string!"); 1402 } 1403 if (!isConformantSchemeName(p_scheme)) { 1404 throw new MalformedURIException("The scheme is not conformant."); 1405 } 1406 1407 m_scheme = p_scheme.toLowerCase(); 1408 } 1409 1410 /** 1411 * Set the userinfo for this URI. If a non-null value is passed in and 1412 * the host value is null, then an exception is thrown. 1413 * 1414 * @param p_userinfo the userinfo for this URI 1415 * 1416 * @exception MalformedURIException if p_userinfo contains invalid 1417 * characters 1418 */ 1419 public void setUserinfo(String p_userinfo) throws MalformedURIException { 1420 if (p_userinfo == null) { 1421 m_userinfo = null; 1422 return; 1423 } 1424 else { 1425 if (m_host == null) { 1426 throw new MalformedURIException( 1427 "Userinfo cannot be set when host is null!"); 1428 } 1429 1430 // userinfo can contain alphanumerics, mark characters, escaped 1431 // and ';',':','&','=','+','$',',' 1432 int index = 0; 1433 int end = p_userinfo.length(); 1434 char testChar = '\0'; 1435 while (index < end) { 1436 testChar = p_userinfo.charAt(index); 1437 if (testChar == '%') { 1438 if (index+2 >= end || 1439 !isHex(p_userinfo.charAt(index+1)) || 1440 !isHex(p_userinfo.charAt(index+2))) { 1441 throw new MalformedURIException( 1442 "Userinfo contains invalid escape sequence!"); 1443 } 1444 } 1445 else if (!isUserinfoCharacter(testChar)) { 1446 throw new MalformedURIException( 1447 "Userinfo contains invalid character:"+testChar); 1448 } 1449 index++; 1450 } 1451 } 1452 m_userinfo = p_userinfo; 1453 } 1454 1455 /** 1456 * <p>Set the host for this URI. If null is passed in, the userinfo 1457 * field is also set to null and the port is set to -1.</p> 1458 * 1459 * <p>Note: This method overwrites registry based authority if it 1460 * previously existed in this URI.</p> 1461 * 1462 * @param p_host the host for this URI 1463 * 1464 * @exception MalformedURIException if p_host is not a valid IP 1465 * address or DNS hostname. 1466 */ 1467 public void setHost(String p_host) throws MalformedURIException { 1468 if (p_host == null || p_host.length() == 0) { 1469 if (p_host != null) { 1470 m_regAuthority = null; 1471 } 1472 m_host = p_host; 1473 m_userinfo = null; 1474 m_port = -1; 1475 return; 1476 } 1477 else if (!isWellFormedAddress(p_host)) { 1478 throw new MalformedURIException("Host is not a well formed address!"); 1479 } 1480 m_host = p_host; 1481 m_regAuthority = null; 1482 } 1483 1484 /** 1485 * Set the port for this URI. -1 is used to indicate that the port is 1486 * not specified, otherwise valid port numbers are between 0 and 65535. 1487 * If a valid port number is passed in and the host field is null, 1488 * an exception is thrown. 1489 * 1490 * @param p_port the port number for this URI 1491 * 1492 * @exception MalformedURIException if p_port is not -1 and not a 1493 * valid port number 1494 */ 1495 public void setPort(int p_port) throws MalformedURIException { 1496 if (p_port >= 0 && p_port <= 65535) { 1497 if (m_host == null) { 1498 throw new MalformedURIException( 1499 "Port cannot be set when host is null!"); 1500 } 1501 } 1502 else if (p_port != -1) { 1503 throw new MalformedURIException("Invalid port number!"); 1504 } 1505 m_port = p_port; 1506 } 1507 1508 /** 1509 * <p>Sets the registry based authority for this URI.</p> 1510 * 1511 * <p>Note: This method overwrites server based authority 1512 * if it previously existed in this URI.</p> 1513 * 1514 * @param authority the registry based authority for this URI 1515 * 1516 * @exception MalformedURIException it authority is not a 1517 * well formed registry based authority 1518 */ 1519 public void setRegBasedAuthority(String authority) 1520 throws MalformedURIException { 1521 1522 if (authority == null) { 1523 m_regAuthority = null; 1524 return; 1525 } 1526 // reg_name = 1*( unreserved | escaped | "$" | "," | 1527 // ";" | ":" | "@" | "&" | "=" | "+" ) 1528 else if (authority.length() < 1 || 1529 !isValidRegistryBasedAuthority(authority) || 1530 authority.indexOf('/') != -1) { 1531 throw new MalformedURIException("Registry based authority is not well formed."); 1532 } 1533 m_regAuthority = authority; 1534 m_host = null; 1535 m_userinfo = null; 1536 m_port = -1; 1537 } 1538 1539 /** 1540 * Set the path for this URI. If the supplied path is null, then the 1541 * query string and fragment are set to null as well. If the supplied 1542 * path includes a query string and/or fragment, these fields will be 1543 * parsed and set as well. Note that, for URIs following the "generic 1544 * URI" syntax, the path specified should start with a slash. 1545 * For URIs that do not follow the generic URI syntax, this method 1546 * sets the scheme-specific part. 1547 * 1548 * @param p_path the path for this URI (may be null) 1549 * 1550 * @exception MalformedURIException if p_path contains invalid 1551 * characters 1552 */ 1553 public void setPath(String p_path) throws MalformedURIException { 1554 if (p_path == null) { 1555 m_path = null; 1556 m_queryString = null; 1557 m_fragment = null; 1558 } 1559 else { 1560 initializePath(p_path, 0); 1561 } 1562 } 1563 1564 /** 1565 * Append to the end of the path of this URI. If the current path does 1566 * not end in a slash and the path to be appended does not begin with 1567 * a slash, a slash will be appended to the current path before the 1568 * new segment is added. Also, if the current path ends in a slash 1569 * and the new segment begins with a slash, the extra slash will be 1570 * removed before the new segment is appended. 1571 * 1572 * @param p_addToPath the new segment to be added to the current path 1573 * 1574 * @exception MalformedURIException if p_addToPath contains syntax 1575 * errors 1576 */ 1577 public void appendPath(String p_addToPath) 1578 throws MalformedURIException { 1579 if (p_addToPath == null || p_addToPath.trim().length() == 0) { 1580 return; 1581 } 1582 1583 if (!isURIString(p_addToPath)) { 1584 throw new MalformedURIException( 1585 "Path contains invalid character!"); 1586 } 1587 1588 if (m_path == null || m_path.trim().length() == 0) { 1589 if (p_addToPath.startsWith("/")) { 1590 m_path = p_addToPath; 1591 } 1592 else { 1593 m_path = "/" + p_addToPath; 1594 } 1595 } 1596 else if (m_path.endsWith("/")) { 1597 if (p_addToPath.startsWith("/")) { 1598 m_path = m_path.concat(p_addToPath.substring(1)); 1599 } 1600 else { 1601 m_path = m_path.concat(p_addToPath); 1602 } 1603 } 1604 else { 1605 if (p_addToPath.startsWith("/")) { 1606 m_path = m_path.concat(p_addToPath); 1607 } 1608 else { 1609 m_path = m_path.concat("/" + p_addToPath); 1610 } 1611 } 1612 } 1613 1614 /** 1615 * Set the query string for this URI. A non-null value is valid only 1616 * if this is an URI conforming to the generic URI syntax and 1617 * the path value is not null. 1618 * 1619 * @param p_queryString the query string for this URI 1620 * 1621 * @exception MalformedURIException if p_queryString is not null and this 1622 * URI does not conform to the generic 1623 * URI syntax or if the path is null 1624 */ 1625 public void setQueryString(String p_queryString) throws MalformedURIException { 1626 if (p_queryString == null) { 1627 m_queryString = null; 1628 } 1629 else if (!isGenericURI()) { 1630 throw new MalformedURIException( 1631 "Query string can only be set for a generic URI!"); 1632 } 1633 else if (getPath() == null) { 1634 throw new MalformedURIException( 1635 "Query string cannot be set when path is null!"); 1636 } 1637 else if (!isURIString(p_queryString)) { 1638 throw new MalformedURIException( 1639 "Query string contains invalid character!"); 1640 } 1641 else { 1642 m_queryString = p_queryString; 1643 } 1644 } 1645 1646 /** 1647 * Set the fragment for this URI. A non-null value is valid only 1648 * if this is a URI conforming to the generic URI syntax and 1649 * the path value is not null. 1650 * 1651 * @param p_fragment the fragment for this URI 1652 * 1653 * @exception MalformedURIException if p_fragment is not null and this 1654 * URI does not conform to the generic 1655 * URI syntax or if the path is null 1656 */ 1657 public void setFragment(String p_fragment) throws MalformedURIException { 1658 if (p_fragment == null) { 1659 m_fragment = null; 1660 } 1661 else if (!isGenericURI()) { 1662 throw new MalformedURIException( 1663 "Fragment can only be set for a generic URI!"); 1664 } 1665 else if (getPath() == null) { 1666 throw new MalformedURIException( 1667 "Fragment cannot be set when path is null!"); 1668 } 1669 else if (!isURIString(p_fragment)) { 1670 throw new MalformedURIException( 1671 "Fragment contains invalid character!"); 1672 } 1673 else { 1674 m_fragment = p_fragment; 1675 } 1676 } 1677 1678 /** 1679 * Determines if the passed-in Object is equivalent to this URI. 1680 * 1681 * @param p_test the Object to test for equality. 1682 * 1683 * @return true if p_test is a URI with all values equal to this 1684 * URI, false otherwise 1685 */ 1686 public boolean equals(Object p_test) { 1687 if (p_test instanceof URI) { 1688 URI testURI = (URI) p_test; 1689 if (((m_scheme == null && testURI.m_scheme == null) || 1690 (m_scheme != null && testURI.m_scheme != null && 1691 m_scheme.equals(testURI.m_scheme))) && 1692 ((m_userinfo == null && testURI.m_userinfo == null) || 1693 (m_userinfo != null && testURI.m_userinfo != null && 1694 m_userinfo.equals(testURI.m_userinfo))) && 1695 ((m_host == null && testURI.m_host == null) || 1696 (m_host != null && testURI.m_host != null && 1697 m_host.equals(testURI.m_host))) && 1698 m_port == testURI.m_port && 1699 ((m_path == null && testURI.m_path == null) || 1700 (m_path != null && testURI.m_path != null && 1701 m_path.equals(testURI.m_path))) && 1702 ((m_queryString == null && testURI.m_queryString == null) || 1703 (m_queryString != null && testURI.m_queryString != null && 1704 m_queryString.equals(testURI.m_queryString))) && 1705 ((m_fragment == null && testURI.m_fragment == null) || 1706 (m_fragment != null && testURI.m_fragment != null && 1707 m_fragment.equals(testURI.m_fragment)))) { 1708 return true; 1709 } 1710 } 1711 return false; 1712 } 1713 1714 /** 1715 * Get the URI as a string specification. See RFC 2396 Section 5.2. 1716 * 1717 * @return the URI string specification 1718 */ 1719 public String toString() { 1720 StringBuffer uriSpecString = new StringBuffer(); 1721 1722 if (m_scheme != null) { 1723 uriSpecString.append(m_scheme); 1724 uriSpecString.append(':'); 1725 } 1726 uriSpecString.append(getSchemeSpecificPart()); 1727 return uriSpecString.toString(); 1728 } 1729 1730 /** 1731 * Get the indicator as to whether this URI uses the "generic URI" 1732 * syntax. 1733 * 1734 * @return true if this URI uses the "generic URI" syntax, false 1735 * otherwise 1736 */ 1737 public boolean isGenericURI() { 1738 // presence of the host (whether valid or empty) means 1739 // double-slashes which means generic uri 1740 return (m_host != null); 1741 } 1742 1743 /** 1744 * Returns whether this URI represents an absolute URI. 1745 * 1746 * @return true if this URI represents an absolute URI, false 1747 * otherwise 1748 */ 1749 public boolean isAbsoluteURI() { 1750 // presence of the scheme means absolute uri 1751 return (m_scheme != null); 1752 } 1753 1754 /** 1755 * Determine whether a scheme conforms to the rules for a scheme name. 1756 * A scheme is conformant if it starts with an alphanumeric, and 1757 * contains only alphanumerics, '+','-' and '.'. 1758 * 1759 * @return true if the scheme is conformant, false otherwise 1760 */ 1761 public static boolean isConformantSchemeName(String p_scheme) { 1762 if (p_scheme == null || p_scheme.trim().length() == 0) { 1763 return false; 1764 } 1765 1766 if (!isAlpha(p_scheme.charAt(0))) { 1767 return false; 1768 } 1769 1770 char testChar; 1771 int schemeLength = p_scheme.length(); 1772 for (int i = 1; i < schemeLength; ++i) { 1773 testChar = p_scheme.charAt(i); 1774 if (!isSchemeCharacter(testChar)) { 1775 return false; 1776 } 1777 } 1778 1779 return true; 1780 } 1781 1782 /** 1783 * Determine whether a string is syntactically capable of representing 1784 * a valid IPv4 address, IPv6 reference or the domain name of a network host. 1785 * A valid IPv4 address consists of four decimal digit groups separated by a 1786 * '.'. Each group must consist of one to three digits. See RFC 2732 Section 3, 1787 * and RFC 2373 Section 2.2, for the definition of IPv6 references. A hostname 1788 * consists of domain labels (each of which must begin and end with an alphanumeric 1789 * but may contain '-') separated & by a '.'. See RFC 2396 Section 3.2.2. 1790 * 1791 * @return true if the string is a syntactically valid IPv4 address, 1792 * IPv6 reference or hostname 1793 */ 1794 public static boolean isWellFormedAddress(String address) { 1795 if (address == null) { 1796 return false; 1797 } 1798 1799 int addrLength = address.length(); 1800 if (addrLength == 0) { 1801 return false; 1802 } 1803 1804 // Check if the host is a valid IPv6reference. 1805 if (address.startsWith("[")) { 1806 return isWellFormedIPv6Reference(address); 1807 } 1808 1809 // Cannot start with a '.', '-', or end with a '-'. 1810 if (address.startsWith(".") || 1811 address.startsWith("-") || 1812 address.endsWith("-")) { 1813 return false; 1814 } 1815 1816 // rightmost domain label starting with digit indicates IP address 1817 // since top level domain label can only start with an alpha 1818 // see RFC 2396 Section 3.2.2 1819 int index = address.lastIndexOf('.'); 1820 if (address.endsWith(".")) { 1821 index = address.substring(0, index).lastIndexOf('.'); 1822 } 1823 1824 if (index+1 < addrLength && isDigit(address.charAt(index+1))) { 1825 return isWellFormedIPv4Address(address); 1826 } 1827 else { 1828 // hostname = *( domainlabel "." ) toplabel [ "." ] 1829 // domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum 1830 // toplabel = alpha | alpha *( alphanum | "-" ) alphanum 1831 1832 // RFC 2396 states that hostnames take the form described in 1833 // RFC 1034 (Section 3) and RFC 1123 (Section 2.1). According 1834 // to RFC 1034, hostnames are limited to 255 characters. 1835 if (addrLength > 255) { 1836 return false; 1837 } 1838 1839 // domain labels can contain alphanumerics and '-" 1840 // but must start and end with an alphanumeric 1841 char testChar; 1842 int labelCharCount = 0; 1843 1844 for (int i = 0; i < addrLength; i++) { 1845 testChar = address.charAt(i); 1846 if (testChar == '.') { 1847 if (!isAlphanum(address.charAt(i-1))) { 1848 return false; 1849 } 1850 if (i+1 < addrLength && !isAlphanum(address.charAt(i+1))) { 1851 return false; 1852 } 1853 labelCharCount = 0; 1854 } 1855 else if (!isAlphanum(testChar) && testChar != '-') { 1856 return false; 1857 } 1858 // RFC 1034: Labels must be 63 characters or less. 1859 else if (++labelCharCount > 63) { 1860 return false; 1861 } 1862 } 1863 } 1864 return true; 1865 } 1866 1867 /** 1868 * <p>Determines whether a string is an IPv4 address as defined by 1869 * RFC 2373, and under the further constraint that it must be a 32-bit 1870 * address. Though not expressed in the grammar, in order to satisfy 1871 * the 32-bit address constraint, each segment of the address cannot 1872 * be greater than 255 (8 bits of information).</p> 1873 * 1874 * <p><code>IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT</code></p> 1875 * 1876 * @return true if the string is a syntactically valid IPv4 address 1877 */ 1878 public static boolean isWellFormedIPv4Address(String address) { 1879 1880 int addrLength = address.length(); 1881 char testChar; 1882 int numDots = 0; 1883 int numDigits = 0; 1884 1885 // make sure that 1) we see only digits and dot separators, 2) that 1886 // any dot separator is preceded and followed by a digit and 1887 // 3) that we find 3 dots 1888 // 1889 // RFC 2732 amended RFC 2396 by replacing the definition 1890 // of IPv4address with the one defined by RFC 2373. - mrglavas 1891 // 1892 // IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT 1893 // 1894 // One to three digits must be in each segment. 1895 for (int i = 0; i < addrLength; i++) { 1896 testChar = address.charAt(i); 1897 if (testChar == '.') { 1898 if ((i > 0 && !isDigit(address.charAt(i-1))) || 1899 (i+1 < addrLength && !isDigit(address.charAt(i+1)))) { 1900 return false; 1901 } 1902 numDigits = 0; 1903 if (++numDots > 3) { 1904 return false; 1905 } 1906 } 1907 else if (!isDigit(testChar)) { 1908 return false; 1909 } 1910 // Check that that there are no more than three digits 1911 // in this segment. 1912 else if (++numDigits > 3) { 1913 return false; 1914 } 1915 // Check that this segment is not greater than 255. 1916 else if (numDigits == 3) { 1917 char first = address.charAt(i-2); 1918 char second = address.charAt(i-1); 1919 if (!(first < '2' || 1920 (first == '2' && 1921 (second < '5' || 1922 (second == '5' && testChar <= '5'))))) { 1923 return false; 1924 } 1925 } 1926 } 1927 return (numDots == 3); 1928 } 1929 1930 /** 1931 * <p>Determines whether a string is an IPv6 reference as defined 1932 * by RFC 2732, where IPv6address is defined in RFC 2373. The 1933 * IPv6 address is parsed according to Section 2.2 of RFC 2373, 1934 * with the additional constraint that the address be composed of 1935 * 128 bits of information.</p> 1936 * 1937 * <p><code>IPv6reference = "[" IPv6address "]"</code></p> 1938 * 1939 * <p>Note: The BNF expressed in RFC 2373 Appendix B does not 1940 * accurately describe section 2.2, and was in fact removed from 1941 * RFC 3513, the successor of RFC 2373.</p> 1942 * 1943 * @return true if the string is a syntactically valid IPv6 reference 1944 */ 1945 public static boolean isWellFormedIPv6Reference(String address) { 1946 1947 int addrLength = address.length(); 1948 int index = 1; 1949 int end = addrLength-1; 1950 1951 // Check if string is a potential match for IPv6reference. 1952 if (!(addrLength > 2 && address.charAt(0) == '[' 1953 && address.charAt(end) == ']')) { 1954 return false; 1955 } 1956 1957 // Counter for the number of 16-bit sections read in the address. 1958 int [] counter = new int[1]; 1959 1960 // Scan hex sequence before possible '::' or IPv4 address. 1961 index = scanHexSequence(address, index, end, counter); 1962 if (index == -1) { 1963 return false; 1964 } 1965 // Address must contain 128-bits of information. 1966 else if (index == end) { 1967 return (counter[0] == 8); 1968 } 1969 1970 if (index+1 < end && address.charAt(index) == ':') { 1971 if (address.charAt(index+1) == ':') { 1972 // '::' represents at least one 16-bit group of zeros. 1973 if (++counter[0] > 8) { 1974 return false; 1975 } 1976 index += 2; 1977 // Trailing zeros will fill out the rest of the address. 1978 if (index == end) { 1979 return true; 1980 } 1981 } 1982 // If the second character wasn't ':', in order to be valid, 1983 // the remainder of the string must match IPv4Address, 1984 // and we must have read exactly 6 16-bit groups. 1985 else { 1986 return (counter[0] == 6) && 1987 isWellFormedIPv4Address(address.substring(index+1, end)); 1988 } 1989 } 1990 else { 1991 return false; 1992 } 1993 1994 // 3. Scan hex sequence after '::'. 1995 int prevCount = counter[0]; 1996 index = scanHexSequence(address, index, end, counter); 1997 1998 // We've either reached the end of the string, the address ends in 1999 // an IPv4 address, or it is invalid. scanHexSequence has already 2000 // made sure that we have the right number of bits. 2001 return (index == end) || 2002 (index != -1 && isWellFormedIPv4Address( 2003 address.substring((counter[0] > prevCount) ? index+1 : index, end))); 2004 } 2005 2006 /** 2007 * Helper method for isWellFormedIPv6Reference which scans the 2008 * hex sequences of an IPv6 address. It returns the index of the 2009 * next character to scan in the address, or -1 if the string 2010 * cannot match a valid IPv6 address. 2011 * 2012 * @param address the string to be scanned 2013 * @param index the beginning index (inclusive) 2014 * @param end the ending index (exclusive) 2015 * @param counter a counter for the number of 16-bit sections read 2016 * in the address 2017 * 2018 * @return the index of the next character to scan, or -1 if the 2019 * string cannot match a valid IPv6 address 2020 */ 2021 private static int scanHexSequence (String address, int index, int end, int [] counter) { 2022 2023 char testChar; 2024 int numDigits = 0; 2025 int start = index; 2026 2027 // Trying to match the following productions: 2028 // hexseq = hex4 *( ":" hex4) 2029 // hex4 = 1*4HEXDIG 2030 for (; index < end; ++index) { 2031 testChar = address.charAt(index); 2032 if (testChar == ':') { 2033 // IPv6 addresses are 128-bit, so there can be at most eight sections. 2034 if (numDigits > 0 && ++counter[0] > 8) { 2035 return -1; 2036 } 2037 // This could be '::'. 2038 if (numDigits == 0 || ((index+1 < end) && address.charAt(index+1) == ':')) { 2039 return index; 2040 } 2041 numDigits = 0; 2042 } 2043 // This might be invalid or an IPv4address. If it's potentially an IPv4address, 2044 // backup to just after the last valid character that matches hexseq. 2045 else if (!isHex(testChar)) { 2046 if (testChar == '.' && numDigits < 4 && numDigits > 0 && counter[0] <= 6) { 2047 int back = index - numDigits - 1; 2048 return (back >= start) ? back : (back+1); 2049 } 2050 return -1; 2051 } 2052 // There can be at most 4 hex digits per group. 2053 else if (++numDigits > 4) { 2054 return -1; 2055 } 2056 } 2057 return (numDigits > 0 && ++counter[0] <= 8) ? end : -1; 2058 } 2059 2060 2061 /** 2062 * Determine whether a char is a digit. 2063 * 2064 * @return true if the char is betweeen '0' and '9', false otherwise 2065 */ 2066 private static boolean isDigit(char p_char) { 2067 return p_char >= '0' && p_char <= '9'; 2068 } 2069 2070 /** 2071 * Determine whether a character is a hexadecimal character. 2072 * 2073 * @return true if the char is betweeen '0' and '9', 'a' and 'f' 2074 * or 'A' and 'F', false otherwise 2075 */ 2076 private static boolean isHex(char p_char) { 2077 return (p_char <= 'f' && (fgLookupTable[p_char] & ASCII_HEX_CHARACTERS) != 0); 2078 } 2079 2080 /** 2081 * Determine whether a char is an alphabetic character: a-z or A-Z 2082 * 2083 * @return true if the char is alphabetic, false otherwise 2084 */ 2085 private static boolean isAlpha(char p_char) { 2086 return ((p_char >= 'a' && p_char <= 'z') || (p_char >= 'A' && p_char <= 'Z' )); 2087 } 2088 2089 /** 2090 * Determine whether a char is an alphanumeric: 0-9, a-z or A-Z 2091 * 2092 * @return true if the char is alphanumeric, false otherwise 2093 */ 2094 private static boolean isAlphanum(char p_char) { 2095 return (p_char <= 'z' && (fgLookupTable[p_char] & MASK_ALPHA_NUMERIC) != 0); 2096 } 2097 2098 /** 2099 * Determine whether a character is a reserved character: 2100 * ';', '/', '?', ':', '@', '&', '=', '+', '$', ',', '[', or ']' 2101 * 2102 * @return true if the string contains any reserved characters 2103 */ 2104 private static boolean isReservedCharacter(char p_char) { 2105 return (p_char <= ']' && (fgLookupTable[p_char] & RESERVED_CHARACTERS) != 0); 2106 } 2107 2108 /** 2109 * Determine whether a char is an unreserved character. 2110 * 2111 * @return true if the char is unreserved, false otherwise 2112 */ 2113 private static boolean isUnreservedCharacter(char p_char) { 2114 return (p_char <= '~' && (fgLookupTable[p_char] & MASK_UNRESERVED_MASK) != 0); 2115 } 2116 2117 /** 2118 * Determine whether a char is a URI character (reserved or 2119 * unreserved, not including '%' for escaped octets). 2120 * 2121 * @return true if the char is a URI character, false otherwise 2122 */ 2123 private static boolean isURICharacter (char p_char) { 2124 return (p_char <= '~' && (fgLookupTable[p_char] & MASK_URI_CHARACTER) != 0); 2125 } 2126 2127 /** 2128 * Determine whether a char is a scheme character. 2129 * 2130 * @return true if the char is a scheme character, false otherwise 2131 */ 2132 private static boolean isSchemeCharacter (char p_char) { 2133 return (p_char <= 'z' && (fgLookupTable[p_char] & MASK_SCHEME_CHARACTER) != 0); 2134 } 2135 2136 /** 2137 * Determine whether a char is a userinfo character. 2138 * 2139 * @return true if the char is a userinfo character, false otherwise 2140 */ 2141 private static boolean isUserinfoCharacter (char p_char) { 2142 return (p_char <= 'z' && (fgLookupTable[p_char] & MASK_USERINFO_CHARACTER) != 0); 2143 } 2144 2145 /** 2146 * Determine whether a char is a path character. 2147 * 2148 * @return true if the char is a path character, false otherwise 2149 */ 2150 private static boolean isPathCharacter (char p_char) { 2151 return (p_char <= '~' && (fgLookupTable[p_char] & MASK_PATH_CHARACTER) != 0); 2152 } 2153 2154 2155 /** 2156 * Determine whether a given string contains only URI characters (also 2157 * called "uric" in RFC 2396). uric consist of all reserved 2158 * characters, unreserved characters and escaped characters. 2159 * 2160 * @return true if the string is comprised of uric, false otherwise 2161 */ 2162 private static boolean isURIString(String p_uric) { 2163 if (p_uric == null) { 2164 return false; 2165 } 2166 int end = p_uric.length(); 2167 char testChar = '\0'; 2168 for (int i = 0; i < end; i++) { 2169 testChar = p_uric.charAt(i); 2170 if (testChar == '%') { 2171 if (i+2 >= end || 2172 !isHex(p_uric.charAt(i+1)) || 2173 !isHex(p_uric.charAt(i+2))) { 2174 return false; 2175 } 2176 else { 2177 i += 2; 2178 continue; 2179 } 2180 } 2181 if (isURICharacter(testChar)) { 2182 continue; 2183 } 2184 else { 2185 return false; 2186 } 2187 } 2188 return true; 2189 } 2190 }