1 /* 2 * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package com.sun.xml.internal.messaging.saaj.util; 27 28 // Imported from: org.apache.xerces.util 29 // Needed to work around differences in JDK1.2 and 1.3 and deal with userInfo 30 31 import java.io.IOException; 32 import java.io.Serializable; 33 34 35 /********************************************************************** 36 * A class to represent a Uniform Resource Identifier (URI). This class 37 * is designed to handle the parsing of URIs and provide access to 38 * the various components (scheme, host, port, userinfo, path, query 39 * string and fragment) that may constitute a URI. 40 * <p> 41 * Parsing of a URI specification is done according to the URI 42 * syntax described in RFC 2396 43 * <http://www.ietf.org/rfc/rfc2396.txt?number=2396>. Every URI consists 44 * of a scheme, followed by a colon (':'), followed by a scheme-specific 45 * part. For URIs that follow the "generic URI" syntax, the scheme- 46 * specific part begins with two slashes ("//") and may be followed 47 * by an authority segment (comprised of user information, host, and 48 * port), path segment, query segment and fragment. Note that RFC 2396 49 * no longer specifies the use of the parameters segment and excludes 50 * the "user:password" syntax as part of the authority segment. If 51 * "user:password" appears in a URI, the entire user/password string 52 * is stored as userinfo. 53 * <p> 54 * For URIs that do not follow the "generic URI" syntax (e.g. mailto), 55 * the entire scheme-specific part is treated as the "path" portion 56 * of the URI. 57 * <p> 58 * Note that, unlike the java.net.URL class, this class does not provide 59 * any built-in network access functionality nor does it provide any 60 * scheme-specific functionality (for example, it does not know a 61 * default port for a specific scheme). Rather, it only knows the 62 * grammar and basic set of operations that can be applied to a URI. 63 * 64 * @version 65 * 66 **********************************************************************/ 67 public class JaxmURI implements Serializable { 68 69 /******************************************************************* 70 * MalformedURIExceptions are thrown in the process of building a URI 71 * or setting fields on a URI when an operation would result in an 72 * invalid URI specification. 73 * 74 ********************************************************************/ 75 public static class MalformedURIException extends IOException { 76 77 /****************************************************************** 78 * Constructs a <code>MalformedURIException</code> with no specified 79 * detail message. 80 ******************************************************************/ 81 public MalformedURIException() { 82 super(); 83 } 84 85 /***************************************************************** 86 * Constructs a <code>MalformedURIException</code> with the 87 * specified detail message. 88 * 89 * @param p_msg the detail message. 90 ******************************************************************/ 91 public MalformedURIException(String p_msg) { 92 super(p_msg); 93 } 94 } 95 96 /** reserved characters */ 97 private static final String RESERVED_CHARACTERS = ";/?:@&=+$,"; 98 99 /** URI punctuation mark characters - these, combined with 100 alphanumerics, constitute the "unreserved" characters */ 101 private static final String MARK_CHARACTERS = "-_.!~*'() "; 102 103 /** scheme can be composed of alphanumerics and these characters */ 104 private static final String SCHEME_CHARACTERS = "+-."; 105 106 /** userinfo can be composed of unreserved, escaped and these 107 characters */ 108 private static final String USERINFO_CHARACTERS = ";:&=+$,"; 109 110 /** Stores the scheme (usually the protocol) for this URI. */ 111 private String m_scheme = null; 112 113 /** If specified, stores the userinfo for this URI; otherwise null */ 114 private String m_userinfo = null; 115 116 /** If specified, stores the host for this URI; otherwise null */ 117 private String m_host = null; 118 119 /** If specified, stores the port for this URI; otherwise -1 */ 120 private int m_port = -1; 121 122 /** If specified, stores the path for this URI; otherwise null */ 123 private String m_path = null; 124 125 /** If specified, stores the query string for this URI; otherwise 126 null. */ 127 private String m_queryString = null; 128 129 /** If specified, stores the fragment for this URI; otherwise null */ 130 private String m_fragment = null; 131 132 /** 133 * Construct a new and uninitialized URI. 134 */ 135 public JaxmURI() { 136 } 137 138 /** 139 * Construct a new URI from another URI. All fields for this URI are 140 * set equal to the fields of the URI passed in. 141 * 142 * @param p_other the URI to copy (cannot be null) 143 */ 144 public JaxmURI(JaxmURI p_other) { 145 initialize(p_other); 146 } 147 148 /** 149 * Construct a new URI from a URI specification string. If the 150 * specification follows the "generic URI" syntax, (two slashes 151 * following the first colon), the specification will be parsed 152 * accordingly - setting the scheme, userinfo, host,port, path, query 153 * string and fragment fields as necessary. If the specification does 154 * not follow the "generic URI" syntax, the specification is parsed 155 * into a scheme and scheme-specific part (stored as the path) only. 156 * 157 * @param p_uriSpec the URI specification string (cannot be null or 158 * empty) 159 * 160 * @exception MalformedURIException if p_uriSpec violates any syntax 161 * rules 162 */ 163 public JaxmURI(String p_uriSpec) throws MalformedURIException { 164 this((JaxmURI)null, p_uriSpec); 165 } 166 167 /** 168 * Construct a new URI from a base URI and a URI specification string. 169 * The URI specification string may be a relative URI. 170 * 171 * @param p_base the base URI (cannot be null if p_uriSpec is null or 172 * empty) 173 * @param p_uriSpec the URI specification string (cannot be null or 174 * empty if p_base is null) 175 * 176 * @exception MalformedURIException if p_uriSpec violates any syntax 177 * rules 178 */ 179 public JaxmURI(JaxmURI p_base, String p_uriSpec) throws MalformedURIException { 180 initialize(p_base, p_uriSpec); 181 } 182 183 /** 184 * Construct a new URI that does not follow the generic URI syntax. 185 * Only the scheme and scheme-specific part (stored as the path) are 186 * initialized. 187 * 188 * @param p_scheme the URI scheme (cannot be null or empty) 189 * @param p_schemeSpecificPart the scheme-specific part (cannot be 190 * null or empty) 191 * 192 * @exception MalformedURIException if p_scheme violates any 193 * syntax rules 194 */ 195 public JaxmURI(String p_scheme, String p_schemeSpecificPart) 196 throws MalformedURIException { 197 if (p_scheme == null || p_scheme.trim().length() == 0) { 198 throw new MalformedURIException( 199 "Cannot construct URI with null/empty scheme!"); 200 } 201 if (p_schemeSpecificPart == null || 202 p_schemeSpecificPart.trim().length() == 0) { 203 throw new MalformedURIException( 204 "Cannot construct URI with null/empty scheme-specific part!"); 205 } 206 setScheme(p_scheme); 207 setPath(p_schemeSpecificPart); 208 } 209 210 /** 211 * Construct a new URI that follows the generic URI syntax from its 212 * component parts. Each component is validated for syntax and some 213 * basic semantic checks are performed as well. See the individual 214 * setter methods for specifics. 215 * 216 * @param p_scheme the URI scheme (cannot be null or empty) 217 * @param p_host the hostname or IPv4 address for the URI 218 * @param p_path the URI path - if the path contains '?' or '#', 219 * then the query string and/or fragment will be 220 * set from the path; however, if the query and 221 * fragment are specified both in the path and as 222 * separate parameters, an exception is thrown 223 * @param p_queryString the URI query string (cannot be specified 224 * if path is null) 225 * @param p_fragment the URI fragment (cannot be specified if path 226 * is null) 227 * 228 * @exception MalformedURIException if any of the parameters violates 229 * syntax rules or semantic rules 230 */ 231 public JaxmURI(String p_scheme, String p_host, String p_path, 232 String p_queryString, String p_fragment) 233 throws MalformedURIException { 234 this(p_scheme, null, p_host, -1, p_path, p_queryString, p_fragment); 235 } 236 237 /** 238 * Construct a new URI that follows the generic URI syntax from its 239 * component parts. Each component is validated for syntax and some 240 * basic semantic checks are performed as well. See the individual 241 * setter methods for specifics. 242 * 243 * @param p_scheme the URI scheme (cannot be null or empty) 244 * @param p_userinfo the URI userinfo (cannot be specified if host 245 * is null) 246 * @param p_host the hostname or IPv4 address for the URI 247 * @param p_port the URI port (may be -1 for "unspecified"; cannot 248 * be specified if host is null) 249 * @param p_path the URI path - if the path contains '?' or '#', 250 * then the query string and/or fragment will be 251 * set from the path; however, if the query and 252 * fragment are specified both in the path and as 253 * separate parameters, an exception is thrown 254 * @param p_queryString the URI query string (cannot be specified 255 * if path is null) 256 * @param p_fragment the URI fragment (cannot be specified if path 257 * is null) 258 * 259 * @exception MalformedURIException if any of the parameters violates 260 * syntax rules or semantic rules 261 */ 262 public JaxmURI(String p_scheme, String p_userinfo, 263 String p_host, int p_port, String p_path, 264 String p_queryString, String p_fragment) 265 throws MalformedURIException { 266 if (p_scheme == null || p_scheme.trim().length() == 0) { 267 throw new MalformedURIException("Scheme is required!"); 268 } 269 270 if (p_host == null) { 271 if (p_userinfo != null) { 272 throw new MalformedURIException( 273 "Userinfo may not be specified if host is not specified!"); 274 } 275 if (p_port != -1) { 276 throw new MalformedURIException( 277 "Port may not be specified if host is not specified!"); 278 } 279 } 280 281 if (p_path != null) { 282 if (p_path.indexOf('?') != -1 && p_queryString != null) { 283 throw new MalformedURIException( 284 "Query string cannot be specified in path and query string!"); 285 } 286 287 if (p_path.indexOf('#') != -1 && p_fragment != null) { 288 throw new MalformedURIException( 289 "Fragment cannot be specified in both the path and fragment!"); 290 } 291 } 292 293 setScheme(p_scheme); 294 setHost(p_host); 295 setPort(p_port); 296 setUserinfo(p_userinfo); 297 setPath(p_path); 298 setQueryString(p_queryString); 299 setFragment(p_fragment); 300 } 301 302 /** 303 * Initialize all fields of this URI from another URI. 304 * 305 * @param p_other the URI to copy (cannot be null) 306 */ 307 private void initialize(JaxmURI p_other) { 308 m_scheme = p_other.getScheme(); 309 m_userinfo = p_other.getUserinfo(); 310 m_host = p_other.getHost(); 311 m_port = p_other.getPort(); 312 m_path = p_other.getPath(); 313 m_queryString = p_other.getQueryString(); 314 m_fragment = p_other.getFragment(); 315 } 316 317 /** 318 * Initializes this URI from a base URI and a URI specification string. 319 * See RFC 2396 Section 4 and Appendix B for specifications on parsing 320 * the URI and Section 5 for specifications on resolving relative URIs 321 * and relative paths. 322 * 323 * @param p_base the base URI (may be null if p_uriSpec is an absolute 324 * URI) 325 * @param p_uriSpec the URI spec string which may be an absolute or 326 * relative URI (can only be null/empty if p_base 327 * is not null) 328 * 329 * @exception MalformedURIException if p_base is null and p_uriSpec 330 * is not an absolute URI or if 331 * p_uriSpec violates syntax rules 332 */ 333 private void initialize(JaxmURI p_base, String p_uriSpec) 334 throws MalformedURIException { 335 if (p_base == null && 336 (p_uriSpec == null || p_uriSpec.trim().length() == 0)) { 337 throw new MalformedURIException( 338 "Cannot initialize URI with empty parameters."); 339 } 340 341 // just make a copy of the base if spec is empty 342 if (p_uriSpec == null || p_uriSpec.trim().length() == 0) { 343 initialize(p_base); 344 return; 345 } 346 347 String uriSpec = p_uriSpec.trim(); 348 int uriSpecLen = uriSpec.length(); 349 int index = 0; 350 351 // Check for scheme, which must be before `/'. Also handle names with 352 // DOS drive letters ('D:'), so 1-character schemes are not allowed. 353 int colonIdx = uriSpec.indexOf(':'); 354 int slashIdx = uriSpec.indexOf('/'); 355 if ((colonIdx < 2) || (colonIdx > slashIdx && slashIdx != -1)) { 356 int fragmentIdx = uriSpec.indexOf('#'); 357 // A standalone base is a valid URI according to spec 358 if (p_base == null && fragmentIdx != 0 ) { 359 throw new MalformedURIException("No scheme found in URI."); 360 } 361 } 362 else { 363 initializeScheme(uriSpec); 364 index = m_scheme.length()+1; 365 } 366 367 // two slashes means generic URI syntax, so we get the authority 368 if (((index+1) < uriSpecLen) && 369 (uriSpec.substring(index).startsWith("//"))) { 370 index += 2; 371 int startPos = index; 372 373 // get authority - everything up to path, query or fragment 374 char testChar = '\0'; 375 while (index < uriSpecLen) { 376 testChar = uriSpec.charAt(index); 377 if (testChar == '/' || testChar == '?' || testChar == '#') { 378 break; 379 } 380 index++; 381 } 382 383 // if we found authority, parse it out, otherwise we set the 384 // host to empty string 385 if (index > startPos) { 386 initializeAuthority(uriSpec.substring(startPos, index)); 387 } 388 else { 389 m_host = ""; 390 } 391 } 392 393 initializePath(uriSpec.substring(index)); 394 395 // Resolve relative URI to base URI - see RFC 2396 Section 5.2 396 // In some cases, it might make more sense to throw an exception 397 // (when scheme is specified is the string spec and the base URI 398 // is also specified, for example), but we're just following the 399 // RFC specifications 400 if (p_base != null) { 401 402 // check to see if this is the current doc - RFC 2396 5.2 #2 403 // note that this is slightly different from the RFC spec in that 404 // we don't include the check for query string being null 405 // - this handles cases where the urispec is just a query 406 // string or a fragment (e.g. "?y" or "#s") - 407 // see <http://www.ics.uci.edu/~fielding/url/test1.html> which 408 // identified this as a bug in the RFC 409 if (m_path.length() == 0 && m_scheme == null && 410 m_host == null) { 411 m_scheme = p_base.getScheme(); 412 m_userinfo = p_base.getUserinfo(); 413 m_host = p_base.getHost(); 414 m_port = p_base.getPort(); 415 m_path = p_base.getPath(); 416 417 if (m_queryString == null) { 418 m_queryString = p_base.getQueryString(); 419 } 420 return; 421 } 422 423 // check for scheme - RFC 2396 5.2 #3 424 // if we found a scheme, it means absolute URI, so we're done 425 if (m_scheme == null) { 426 m_scheme = p_base.getScheme(); 427 } 428 else { 429 return; 430 } 431 432 // check for authority - RFC 2396 5.2 #4 433 // if we found a host, then we've got a network path, so we're done 434 if (m_host == null) { 435 m_userinfo = p_base.getUserinfo(); 436 m_host = p_base.getHost(); 437 m_port = p_base.getPort(); 438 } 439 else { 440 return; 441 } 442 443 // check for absolute path - RFC 2396 5.2 #5 444 if (m_path.length() > 0 && 445 m_path.startsWith("/")) { 446 return; 447 } 448 449 // if we get to this point, we need to resolve relative path 450 // RFC 2396 5.2 #6 451 String path = ""; 452 String basePath = p_base.getPath(); 453 454 // 6a - get all but the last segment of the base URI path 455 if (basePath != null) { 456 int lastSlash = basePath.lastIndexOf('/'); 457 if (lastSlash != -1) { 458 path = basePath.substring(0, lastSlash+1); 459 } 460 } 461 462 // 6b - append the relative URI path 463 path = path.concat(m_path); 464 465 // 6c - remove all "./" where "." is a complete path segment 466 index = -1; 467 while ((index = path.indexOf("/./")) != -1) { 468 path = path.substring(0, index+1).concat(path.substring(index+3)); 469 } 470 471 // 6d - remove "." if path ends with "." as a complete path segment 472 if (path.endsWith("/.")) { 473 path = path.substring(0, path.length()-1); 474 } 475 476 // 6e - remove all "<segment>/../" where "<segment>" is a complete 477 // path segment not equal to ".." 478 index = 1; 479 int segIndex = -1; 480 String tempString = null; 481 482 while ((index = path.indexOf("/../", index)) > 0) { 483 tempString = path.substring(0, path.indexOf("/../")); 484 segIndex = tempString.lastIndexOf('/'); 485 if (segIndex != -1) { 486 if (!tempString.substring(segIndex++).equals("..")) { 487 path = path.substring(0, segIndex).concat(path.substring(index+4)); 488 } 489 else 490 index += 4; 491 } 492 else 493 index += 4; 494 } 495 496 // 6f - remove ending "<segment>/.." where "<segment>" is a 497 // complete path segment 498 if (path.endsWith("/..")) { 499 tempString = path.substring(0, path.length()-3); 500 segIndex = tempString.lastIndexOf('/'); 501 if (segIndex != -1) { 502 path = path.substring(0, segIndex+1); 503 } 504 } 505 m_path = path; 506 } 507 } 508 509 /** 510 * Initialize the scheme for this URI from a URI string spec. 511 * 512 * @param p_uriSpec the URI specification (cannot be null) 513 * 514 * @exception MalformedURIException if URI does not have a conformant 515 * scheme 516 */ 517 private void initializeScheme(String p_uriSpec) 518 throws MalformedURIException { 519 int uriSpecLen = p_uriSpec.length(); 520 int index = 0; 521 String scheme = null; 522 char testChar = '\0'; 523 524 while (index < uriSpecLen) { 525 testChar = p_uriSpec.charAt(index); 526 if (testChar == ':' || testChar == '/' || 527 testChar == '?' || testChar == '#') { 528 break; 529 } 530 index++; 531 } 532 scheme = p_uriSpec.substring(0, index); 533 534 if (scheme.length() == 0) { 535 throw new MalformedURIException("No scheme found in URI."); 536 } 537 else { 538 setScheme(scheme); 539 } 540 } 541 542 /** 543 * Initialize the authority (userinfo, host and port) for this 544 * URI from a URI string spec. 545 * 546 * @param p_uriSpec the URI specification (cannot be null) 547 * 548 * @exception MalformedURIException if p_uriSpec violates syntax rules 549 */ 550 private void initializeAuthority(String p_uriSpec) 551 throws MalformedURIException { 552 int index = 0; 553 int start = 0; 554 int end = p_uriSpec.length(); 555 char testChar = '\0'; 556 String userinfo = null; 557 558 // userinfo is everything up @ 559 if (p_uriSpec.indexOf('@', start) != -1) { 560 while (index < end) { 561 testChar = p_uriSpec.charAt(index); 562 if (testChar == '@') { 563 break; 564 } 565 index++; 566 } 567 userinfo = p_uriSpec.substring(start, index); 568 index++; 569 } 570 571 // host is everything up to ':' 572 String host = null; 573 start = index; 574 while (index < end) { 575 testChar = p_uriSpec.charAt(index); 576 if (testChar == ':') { 577 break; 578 } 579 index++; 580 } 581 host = p_uriSpec.substring(start, index); 582 int port = -1; 583 if (host.length() > 0) { 584 // port 585 if (testChar == ':') { 586 index++; 587 start = index; 588 while (index < end) { 589 index++; 590 } 591 String portStr = p_uriSpec.substring(start, index); 592 if (portStr.length() > 0) { 593 for (int i = 0; i < portStr.length(); i++) { 594 if (!isDigit(portStr.charAt(i))) { 595 throw new MalformedURIException( 596 portStr + 597 " is invalid. Port should only contain digits!"); 598 } 599 } 600 try { 601 port = Integer.parseInt(portStr); 602 } 603 catch (NumberFormatException nfe) { 604 // can't happen 605 } 606 } 607 } 608 } 609 setHost(host); 610 setPort(port); 611 setUserinfo(userinfo); 612 } 613 614 /** 615 * Initialize the path for this URI from a URI string spec. 616 * 617 * @param p_uriSpec the URI specification (cannot be null) 618 * 619 * @exception MalformedURIException if p_uriSpec violates syntax rules 620 */ 621 private void initializePath(String p_uriSpec) 622 throws MalformedURIException { 623 if (p_uriSpec == null) { 624 throw new MalformedURIException( 625 "Cannot initialize path from null string!"); 626 } 627 628 int index = 0; 629 int start = 0; 630 int end = p_uriSpec.length(); 631 char testChar = '\0'; 632 633 // path - everything up to query string or fragment 634 while (index < end) { 635 testChar = p_uriSpec.charAt(index); 636 if (testChar == '?' || testChar == '#') { 637 break; 638 } 639 // check for valid escape sequence 640 if (testChar == '%') { 641 if (index+2 >= end || 642 !isHex(p_uriSpec.charAt(index+1)) || 643 !isHex(p_uriSpec.charAt(index+2))) { 644 throw new MalformedURIException( 645 "Path contains invalid escape sequence!"); 646 } 647 } 648 else if (!isReservedCharacter(testChar) && 649 !isUnreservedCharacter(testChar)) { 650 throw new MalformedURIException( 651 "Path contains invalid character: " + testChar); 652 } 653 index++; 654 } 655 m_path = p_uriSpec.substring(start, index); 656 657 // query - starts with ? and up to fragment or end 658 if (testChar == '?') { 659 index++; 660 start = index; 661 while (index < end) { 662 testChar = p_uriSpec.charAt(index); 663 if (testChar == '#') { 664 break; 665 } 666 if (testChar == '%') { 667 if (index+2 >= end || 668 !isHex(p_uriSpec.charAt(index+1)) || 669 !isHex(p_uriSpec.charAt(index+2))) { 670 throw new MalformedURIException( 671 "Query string contains invalid escape sequence!"); 672 } 673 } 674 else if (!isReservedCharacter(testChar) && 675 !isUnreservedCharacter(testChar)) { 676 throw new MalformedURIException( 677 "Query string contains invalid character:" + testChar); 678 } 679 index++; 680 } 681 m_queryString = p_uriSpec.substring(start, index); 682 } 683 684 // fragment - starts with # 685 if (testChar == '#') { 686 index++; 687 start = index; 688 while (index < end) { 689 testChar = p_uriSpec.charAt(index); 690 691 if (testChar == '%') { 692 if (index+2 >= end || 693 !isHex(p_uriSpec.charAt(index+1)) || 694 !isHex(p_uriSpec.charAt(index+2))) { 695 throw new MalformedURIException( 696 "Fragment contains invalid escape sequence!"); 697 } 698 } 699 else if (!isReservedCharacter(testChar) && 700 !isUnreservedCharacter(testChar)) { 701 throw new MalformedURIException( 702 "Fragment contains invalid character:"+testChar); 703 } 704 index++; 705 } 706 m_fragment = p_uriSpec.substring(start, index); 707 } 708 } 709 710 /** 711 * Get the scheme for this URI. 712 * 713 * @return the scheme for this URI 714 */ 715 public String getScheme() { 716 return m_scheme; 717 } 718 719 /** 720 * Get the scheme-specific part for this URI (everything following the 721 * scheme and the first colon). See RFC 2396 Section 5.2 for spec. 722 * 723 * @return the scheme-specific part for this URI 724 */ 725 public String getSchemeSpecificPart() { 726 StringBuilder schemespec = new StringBuilder(); 727 728 if (m_userinfo != null || m_host != null || m_port != -1) { 729 schemespec.append("//"); 730 } 731 732 if (m_userinfo != null) { 733 schemespec.append(m_userinfo); 734 schemespec.append('@'); 735 } 736 737 if (m_host != null) { 738 schemespec.append(m_host); 739 } 740 741 if (m_port != -1) { 742 schemespec.append(':'); 743 schemespec.append(m_port); 744 } 745 746 if (m_path != null) { 747 schemespec.append((m_path)); 748 } 749 750 if (m_queryString != null) { 751 schemespec.append('?'); 752 schemespec.append(m_queryString); 753 } 754 755 if (m_fragment != null) { 756 schemespec.append('#'); 757 schemespec.append(m_fragment); 758 } 759 760 return schemespec.toString(); 761 } 762 763 /** 764 * Get the userinfo for this URI. 765 * 766 * @return the userinfo for this URI (null if not specified). 767 */ 768 public String getUserinfo() { 769 return m_userinfo; 770 } 771 772 /** 773 * Get the host for this URI. 774 * 775 * @return the host for this URI (null if not specified). 776 */ 777 public String getHost() { 778 return m_host; 779 } 780 781 /** 782 * Get the port for this URI. 783 * 784 * @return the port for this URI (-1 if not specified). 785 */ 786 public int getPort() { 787 return m_port; 788 } 789 790 /** 791 * Get the path for this URI (optionally with the query string and 792 * fragment). 793 * 794 * @param p_includeQueryString if true (and query string is not null), 795 * then a "?" followed by the query string 796 * will be appended 797 * @param p_includeFragment if true (and fragment is not null), 798 * then a "#" followed by the fragment 799 * will be appended 800 * 801 * @return the path for this URI possibly including the query string 802 * and fragment 803 */ 804 public String getPath(boolean p_includeQueryString, 805 boolean p_includeFragment) { 806 StringBuilder pathString = new StringBuilder(m_path); 807 808 if (p_includeQueryString && m_queryString != null) { 809 pathString.append('?'); 810 pathString.append(m_queryString); 811 } 812 813 if (p_includeFragment && m_fragment != null) { 814 pathString.append('#'); 815 pathString.append(m_fragment); 816 } 817 return pathString.toString(); 818 } 819 820 /** 821 * Get the path for this URI. Note that the value returned is the path 822 * only and does not include the query string or fragment. 823 * 824 * @return the path for this URI. 825 */ 826 public String getPath() { 827 return m_path; 828 } 829 830 /** 831 * Get the query string for this URI. 832 * 833 * @return the query string for this URI. Null is returned if there 834 * was no "?" in the URI spec, empty string if there was a 835 * "?" but no query string following it. 836 */ 837 public String getQueryString() { 838 return m_queryString; 839 } 840 841 /** 842 * Get the fragment for this URI. 843 * 844 * @return the fragment for this URI. Null is returned if there 845 * was no "#" in the URI spec, empty string if there was a 846 * "#" but no fragment following it. 847 */ 848 public String getFragment() { 849 return m_fragment; 850 } 851 852 /** 853 * Set the scheme for this URI. The scheme is converted to lowercase 854 * before it is set. 855 * 856 * @param p_scheme the scheme for this URI (cannot be null) 857 * 858 * @exception MalformedURIException if p_scheme is not a conformant 859 * scheme name 860 */ 861 public void setScheme(String p_scheme) throws MalformedURIException { 862 if (p_scheme == null) { 863 throw new MalformedURIException( 864 "Cannot set scheme from null string!"); 865 } 866 if (!isConformantSchemeName(p_scheme)) { 867 throw new MalformedURIException("The scheme is not conformant."); 868 } 869 870 m_scheme = p_scheme.toLowerCase(); 871 } 872 873 /** 874 * Set the userinfo for this URI. If a non-null value is passed in and 875 * the host value is null, then an exception is thrown. 876 * 877 * @param p_userinfo the userinfo for this URI 878 * 879 * @exception MalformedURIException if p_userinfo contains invalid 880 * characters 881 */ 882 public void setUserinfo(String p_userinfo) throws MalformedURIException { 883 if (p_userinfo == null) { 884 m_userinfo = null; 885 } 886 else { 887 if (m_host == null) { 888 throw new MalformedURIException( 889 "Userinfo cannot be set when host is null!"); 890 } 891 892 // userinfo can contain alphanumerics, mark characters, escaped 893 // and ';',':','&','=','+','$',',' 894 int index = 0; 895 int end = p_userinfo.length(); 896 char testChar = '\0'; 897 while (index < end) { 898 testChar = p_userinfo.charAt(index); 899 if (testChar == '%') { 900 if (index+2 >= end || 901 !isHex(p_userinfo.charAt(index+1)) || 902 !isHex(p_userinfo.charAt(index+2))) { 903 throw new MalformedURIException( 904 "Userinfo contains invalid escape sequence!"); 905 } 906 } 907 else if (!isUnreservedCharacter(testChar) && 908 USERINFO_CHARACTERS.indexOf(testChar) == -1) { 909 throw new MalformedURIException( 910 "Userinfo contains invalid character:"+testChar); 911 } 912 index++; 913 } 914 } 915 m_userinfo = p_userinfo; 916 } 917 918 /** 919 * Set the host for this URI. If null is passed in, the userinfo 920 * field is also set to null and the port is set to -1. 921 * 922 * @param p_host the host for this URI 923 * 924 * @exception MalformedURIException if p_host is not a valid IP 925 * address or DNS hostname. 926 */ 927 public void setHost(String p_host) throws MalformedURIException { 928 if (p_host == null || p_host.trim().length() == 0) { 929 m_host = p_host; 930 m_userinfo = null; 931 m_port = -1; 932 } 933 else if (!isWellFormedAddress(p_host)) { 934 throw new MalformedURIException("Host is not a well formed address!"); 935 } 936 m_host = p_host; 937 } 938 939 /** 940 * Set the port for this URI. -1 is used to indicate that the port is 941 * not specified, otherwise valid port numbers are between 0 and 65535. 942 * If a valid port number is passed in and the host field is null, 943 * an exception is thrown. 944 * 945 * @param p_port the port number for this URI 946 * 947 * @exception MalformedURIException if p_port is not -1 and not a 948 * valid port number 949 */ 950 public void setPort(int p_port) throws MalformedURIException { 951 if (p_port >= 0 && p_port <= 65535) { 952 if (m_host == null) { 953 throw new MalformedURIException( 954 "Port cannot be set when host is null!"); 955 } 956 } 957 else if (p_port != -1) { 958 throw new MalformedURIException("Invalid port number!"); 959 } 960 m_port = p_port; 961 } 962 963 /** 964 * Set the path for this URI. If the supplied path is null, then the 965 * query string and fragment are set to null as well. If the supplied 966 * path includes a query string and/or fragment, these fields will be 967 * parsed and set as well. Note that, for URIs following the "generic 968 * URI" syntax, the path specified should start with a slash. 969 * For URIs that do not follow the generic URI syntax, this method 970 * sets the scheme-specific part. 971 * 972 * @param p_path the path for this URI (may be null) 973 * 974 * @exception MalformedURIException if p_path contains invalid 975 * characters 976 */ 977 public void setPath(String p_path) throws MalformedURIException { 978 if (p_path == null) { 979 m_path = null; 980 m_queryString = null; 981 m_fragment = null; 982 } 983 else { 984 initializePath(p_path); 985 } 986 } 987 988 /** 989 * Append to the end of the path of this URI. If the current path does 990 * not end in a slash and the path to be appended does not begin with 991 * a slash, a slash will be appended to the current path before the 992 * new segment is added. Also, if the current path ends in a slash 993 * and the new segment begins with a slash, the extra slash will be 994 * removed before the new segment is appended. 995 * 996 * @param p_addToPath the new segment to be added to the current path 997 * 998 * @exception MalformedURIException if p_addToPath contains syntax 999 * errors 1000 */ 1001 public void appendPath(String p_addToPath) 1002 throws MalformedURIException { 1003 if (p_addToPath == null || p_addToPath.trim().length() == 0) { 1004 return; 1005 } 1006 1007 if (!isURIString(p_addToPath)) { 1008 throw new MalformedURIException( 1009 "Path contains invalid character!"); 1010 } 1011 1012 if (m_path == null || m_path.trim().length() == 0) { 1013 if (p_addToPath.startsWith("/")) { 1014 m_path = p_addToPath; 1015 } 1016 else { 1017 m_path = "/" + p_addToPath; 1018 } 1019 } 1020 else if (m_path.endsWith("/")) { 1021 if (p_addToPath.startsWith("/")) { 1022 m_path = m_path.concat(p_addToPath.substring(1)); 1023 } 1024 else { 1025 m_path = m_path.concat(p_addToPath); 1026 } 1027 } 1028 else { 1029 if (p_addToPath.startsWith("/")) { 1030 m_path = m_path.concat(p_addToPath); 1031 } 1032 else { 1033 m_path = m_path.concat("/" + p_addToPath); 1034 } 1035 } 1036 } 1037 1038 /** 1039 * Set the query string for this URI. A non-null value is valid only 1040 * if this is an URI conforming to the generic URI syntax and 1041 * the path value is not null. 1042 * 1043 * @param p_queryString the query string for this URI 1044 * 1045 * @exception MalformedURIException if p_queryString is not null and this 1046 * URI does not conform to the generic 1047 * URI syntax or if the path is null 1048 */ 1049 public void setQueryString(String p_queryString) throws MalformedURIException { 1050 if (p_queryString == null) { 1051 m_queryString = null; 1052 } 1053 else if (!isGenericURI()) { 1054 throw new MalformedURIException( 1055 "Query string can only be set for a generic URI!"); 1056 } 1057 else if (getPath() == null) { 1058 throw new MalformedURIException( 1059 "Query string cannot be set when path is null!"); 1060 } 1061 else if (!isURIString(p_queryString)) { 1062 throw new MalformedURIException( 1063 "Query string contains invalid character!"); 1064 } 1065 else { 1066 m_queryString = p_queryString; 1067 } 1068 } 1069 1070 /** 1071 * Set the fragment for this URI. A non-null value is valid only 1072 * if this is a URI conforming to the generic URI syntax and 1073 * the path value is not null. 1074 * 1075 * @param p_fragment the fragment for this URI 1076 * 1077 * @exception MalformedURIException if p_fragment is not null and this 1078 * URI does not conform to the generic 1079 * URI syntax or if the path is null 1080 */ 1081 public void setFragment(String p_fragment) throws MalformedURIException { 1082 if (p_fragment == null) { 1083 m_fragment = null; 1084 } 1085 else if (!isGenericURI()) { 1086 throw new MalformedURIException( 1087 "Fragment can only be set for a generic URI!"); 1088 } 1089 else if (getPath() == null) { 1090 throw new MalformedURIException( 1091 "Fragment cannot be set when path is null!"); 1092 } 1093 else if (!isURIString(p_fragment)) { 1094 throw new MalformedURIException( 1095 "Fragment contains invalid character!"); 1096 } 1097 else { 1098 m_fragment = p_fragment; 1099 } 1100 } 1101 1102 /** 1103 * Determines if the passed-in Object is equivalent to this URI. 1104 * 1105 * @param p_test the Object to test for equality. 1106 * 1107 * @return true if p_test is a URI with all values equal to this 1108 * URI, false otherwise 1109 */ 1110 public boolean equals(Object p_test) { 1111 if (p_test instanceof JaxmURI) { 1112 JaxmURI testURI = (JaxmURI) p_test; 1113 if (((m_scheme == null && testURI.m_scheme == null) || 1114 (m_scheme != null && testURI.m_scheme != null && 1115 m_scheme.equals(testURI.m_scheme))) && 1116 ((m_userinfo == null && testURI.m_userinfo == null) || 1117 (m_userinfo != null && testURI.m_userinfo != null && 1118 m_userinfo.equals(testURI.m_userinfo))) && 1119 ((m_host == null && testURI.m_host == null) || 1120 (m_host != null && testURI.m_host != null && 1121 m_host.equals(testURI.m_host))) && 1122 m_port == testURI.m_port && 1123 ((m_path == null && testURI.m_path == null) || 1124 (m_path != null && testURI.m_path != null && 1125 m_path.equals(testURI.m_path))) && 1126 ((m_queryString == null && testURI.m_queryString == null) || 1127 (m_queryString != null && testURI.m_queryString != null && 1128 m_queryString.equals(testURI.m_queryString))) && 1129 ((m_fragment == null && testURI.m_fragment == null) || 1130 (m_fragment != null && testURI.m_fragment != null && 1131 m_fragment.equals(testURI.m_fragment)))) { 1132 return true; 1133 } 1134 } 1135 return false; 1136 } 1137 1138 public int hashCode() { 1139 // No members safe to use, just default to a constant. 1140 return 153214; 1141 } 1142 1143 /** 1144 * Get the URI as a string specification. See RFC 2396 Section 5.2. 1145 * 1146 * @return the URI string specification 1147 */ 1148 public String toString() { 1149 StringBuilder uriSpecString = new StringBuilder(); 1150 1151 if (m_scheme != null) { 1152 uriSpecString.append(m_scheme); 1153 uriSpecString.append(':'); 1154 } 1155 uriSpecString.append(getSchemeSpecificPart()); 1156 return uriSpecString.toString(); 1157 } 1158 1159 /** 1160 * Get the indicator as to whether this URI uses the "generic URI" 1161 * syntax. 1162 * 1163 * @return true if this URI uses the "generic URI" syntax, false 1164 * otherwise 1165 */ 1166 public boolean isGenericURI() { 1167 // presence of the host (whether valid or empty) means 1168 // double-slashes which means generic uri 1169 return (m_host != null); 1170 } 1171 1172 /** 1173 * Determine whether a scheme conforms to the rules for a scheme name. 1174 * A scheme is conformant if it starts with an alphanumeric, and 1175 * contains only alphanumerics, '+','-' and '.'. 1176 * 1177 * @return true if the scheme is conformant, false otherwise 1178 */ 1179 public static boolean isConformantSchemeName(String p_scheme) { 1180 if (p_scheme == null || p_scheme.trim().length() == 0) { 1181 return false; 1182 } 1183 1184 if (!isAlpha(p_scheme.charAt(0))) { 1185 return false; 1186 } 1187 1188 char testChar; 1189 for (int i = 1; i < p_scheme.length(); i++) { 1190 testChar = p_scheme.charAt(i); 1191 if (!isAlphanum(testChar) && 1192 SCHEME_CHARACTERS.indexOf(testChar) == -1) { 1193 return false; 1194 } 1195 } 1196 1197 return true; 1198 } 1199 1200 /** 1201 * Determine whether a string is syntactically capable of representing 1202 * a valid IPv4 address or the domain name of a network host. A valid 1203 * IPv4 address consists of four decimal digit groups separated by a 1204 * '.'. A hostname consists of domain labels (each of which must 1205 * begin and end with an alphanumeric but may contain '-') separated 1206 & by a '.'. See RFC 2396 Section 3.2.2. 1207 * 1208 * @return true if the string is a syntactically valid IPv4 address 1209 * or hostname 1210 */ 1211 public static boolean isWellFormedAddress(String p_address) { 1212 if (p_address == null) { 1213 return false; 1214 } 1215 1216 String address = p_address.trim(); 1217 int addrLength = address.length(); 1218 if (addrLength == 0 || addrLength > 255) { 1219 return false; 1220 } 1221 1222 if (address.startsWith(".") || address.startsWith("-")) { 1223 return false; 1224 } 1225 1226 // rightmost domain label starting with digit indicates IP address 1227 // since top level domain label can only start with an alpha 1228 // see RFC 2396 Section 3.2.2 1229 int index = address.lastIndexOf('.'); 1230 if (address.endsWith(".")) { 1231 index = address.substring(0, index).lastIndexOf('.'); 1232 } 1233 1234 if (index+1 < addrLength && isDigit(p_address.charAt(index+1))) { 1235 char testChar; 1236 int numDots = 0; 1237 1238 // make sure that 1) we see only digits and dot separators, 2) that 1239 // any dot separator is preceded and followed by a digit and 1240 // 3) that we find 3 dots 1241 for (int i = 0; i < addrLength; i++) { 1242 testChar = address.charAt(i); 1243 if (testChar == '.') { 1244 if (!isDigit(address.charAt(i-1)) || 1245 (i+1 < addrLength && !isDigit(address.charAt(i+1)))) { 1246 return false; 1247 } 1248 numDots++; 1249 } 1250 else if (!isDigit(testChar)) { 1251 return false; 1252 } 1253 } 1254 if (numDots != 3) { 1255 return false; 1256 } 1257 } 1258 else { 1259 // domain labels can contain alphanumerics and '-" 1260 // but must start and end with an alphanumeric 1261 char testChar; 1262 1263 for (int i = 0; i < addrLength; i++) { 1264 testChar = address.charAt(i); 1265 if (testChar == '.') { 1266 if (!isAlphanum(address.charAt(i-1))) { 1267 return false; 1268 } 1269 if (i+1 < addrLength && !isAlphanum(address.charAt(i+1))) { 1270 return false; 1271 } 1272 } 1273 else if (!isAlphanum(testChar) && testChar != '-') { 1274 return false; 1275 } 1276 } 1277 } 1278 return true; 1279 } 1280 1281 1282 /** 1283 * Determine whether a char is a digit. 1284 * 1285 * @return true if the char is betweeen '0' and '9', false otherwise 1286 */ 1287 private static boolean isDigit(char p_char) { 1288 return p_char >= '0' && p_char <= '9'; 1289 } 1290 1291 /** 1292 * Determine whether a character is a hexadecimal character. 1293 * 1294 * @return true if the char is betweeen '0' and '9', 'a' and 'f' 1295 * or 'A' and 'F', false otherwise 1296 */ 1297 private static boolean isHex(char p_char) { 1298 return (isDigit(p_char) || 1299 (p_char >= 'a' && p_char <= 'f') || 1300 (p_char >= 'A' && p_char <= 'F')); 1301 } 1302 1303 /** 1304 * Determine whether a char is an alphabetic character: a-z or A-Z 1305 * 1306 * @return true if the char is alphabetic, false otherwise 1307 */ 1308 private static boolean isAlpha(char p_char) { 1309 return ((p_char >= 'a' && p_char <= 'z') || 1310 (p_char >= 'A' && p_char <= 'Z' )); 1311 } 1312 1313 /** 1314 * Determine whether a char is an alphanumeric: 0-9, a-z or A-Z 1315 * 1316 * @return true if the char is alphanumeric, false otherwise 1317 */ 1318 private static boolean isAlphanum(char p_char) { 1319 return (isAlpha(p_char) || isDigit(p_char)); 1320 } 1321 1322 /** 1323 * Determine whether a character is a reserved character: 1324 * ';', '/', '?', ':', '@', '&', '=', '+', '$' or ',' 1325 * 1326 * @return true if the string contains any reserved characters 1327 */ 1328 private static boolean isReservedCharacter(char p_char) { 1329 return RESERVED_CHARACTERS.indexOf(p_char) != -1; 1330 } 1331 1332 /** 1333 * Determine whether a char is an unreserved character. 1334 * 1335 * @return true if the char is unreserved, false otherwise 1336 */ 1337 private static boolean isUnreservedCharacter(char p_char) { 1338 return (isAlphanum(p_char) || 1339 MARK_CHARACTERS.indexOf(p_char) != -1); 1340 } 1341 1342 /** 1343 * Determine whether a given string contains only URI characters (also 1344 * called "uric" in RFC 2396). uric consist of all reserved 1345 * characters, unreserved characters and escaped characters. 1346 * 1347 * @return true if the string is comprised of uric, false otherwise 1348 */ 1349 private static boolean isURIString(String p_uric) { 1350 if (p_uric == null) { 1351 return false; 1352 } 1353 int end = p_uric.length(); 1354 char testChar = '\0'; 1355 for (int i = 0; i < end; i++) { 1356 testChar = p_uric.charAt(i); 1357 if (testChar == '%') { 1358 if (i+2 >= end || 1359 !isHex(p_uric.charAt(i+1)) || 1360 !isHex(p_uric.charAt(i+2))) { 1361 return false; 1362 } 1363 else { 1364 i += 2; 1365 continue; 1366 } 1367 } 1368 if (isReservedCharacter(testChar) || 1369 isUnreservedCharacter(testChar)) { 1370 continue; 1371 } 1372 else { 1373 return false; 1374 } 1375 } 1376 return true; 1377 } 1378 }