1 /* 2 * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package com.sun.xml.internal.messaging.saaj.util; 27 28 // Imported from: org.apache.xerces.util 29 // Needed to work around differences in JDK1.2 and 1.3 and deal with userInfo 30 31 import java.io.IOException; 32 import java.io.Serializable; 33 34 35 /********************************************************************** 36 * A class to represent a Uniform Resource Identifier (URI). This class 37 * is designed to handle the parsing of URIs and provide access to 38 * the various components (scheme, host, port, userinfo, path, query 39 * string and fragment) that may constitute a URI. 40 * <p> 41 * Parsing of a URI specification is done according to the URI 42 * syntax described in <a href="http://www.ietf.org/rfc/rfc2396.txt?number=2396">RFC 2396</a>. 43 * Every URI consists of a scheme, followed by a colon (':'), followed by a scheme-specific 44 * part. For URIs that follow the "generic URI" syntax, the scheme- 45 * specific part begins with two slashes ("//") and may be followed 46 * by an authority segment (comprised of user information, host, and 47 * port), path segment, query segment and fragment. Note that RFC 2396 48 * no longer specifies the use of the parameters segment and excludes 49 * the "user:password" syntax as part of the authority segment. If 50 * "user:password" appears in a URI, the entire user/password string 51 * is stored as userinfo. 52 * <p> 53 * For URIs that do not follow the "generic URI" syntax (e.g. mailto), 54 * the entire scheme-specific part is treated as the "path" portion 55 * of the URI. 56 * <p> 57 * Note that, unlike the java.net.URL class, this class does not provide 58 * any built-in network access functionality nor does it provide any 59 * scheme-specific functionality (for example, it does not know a 60 * default port for a specific scheme). Rather, it only knows the 61 * grammar and basic set of operations that can be applied to a URI. 62 * 63 * @version 64 * 65 **********************************************************************/ 66 public class JaxmURI implements Serializable { 67 68 /******************************************************************* 69 * MalformedURIExceptions are thrown in the process of building a URI 70 * or setting fields on a URI when an operation would result in an 71 * invalid URI specification. 72 * 73 ********************************************************************/ 74 public static class MalformedURIException extends IOException { 75 76 /****************************************************************** 77 * Constructs a <code>MalformedURIException</code> with no specified 78 * detail message. 79 ******************************************************************/ 80 public MalformedURIException() { 81 super(); 82 } 83 84 /***************************************************************** 85 * Constructs a <code>MalformedURIException</code> with the 86 * specified detail message. 87 * 88 * @param p_msg the detail message. 89 ******************************************************************/ 90 public MalformedURIException(String p_msg) { 91 super(p_msg); 92 } 93 } 94 95 /** reserved characters */ 96 private static final String RESERVED_CHARACTERS = ";/?:@&=+$,"; 97 98 /** URI punctuation mark characters - these, combined with 99 alphanumerics, constitute the "unreserved" characters */ 100 private static final String MARK_CHARACTERS = "-_.!~*'() "; 101 102 /** scheme can be composed of alphanumerics and these characters */ 103 private static final String SCHEME_CHARACTERS = "+-."; 104 105 /** userinfo can be composed of unreserved, escaped and these 106 characters */ 107 private static final String USERINFO_CHARACTERS = ";:&=+$,"; 108 109 /** Stores the scheme (usually the protocol) for this URI. */ 110 private String m_scheme = null; 111 112 /** If specified, stores the userinfo for this URI; otherwise null */ 113 private String m_userinfo = null; 114 115 /** If specified, stores the host for this URI; otherwise null */ 116 private String m_host = null; 117 118 /** If specified, stores the port for this URI; otherwise -1 */ 119 private int m_port = -1; 120 121 /** If specified, stores the path for this URI; otherwise null */ 122 private String m_path = null; 123 124 /** If specified, stores the query string for this URI; otherwise 125 null. */ 126 private String m_queryString = null; 127 128 /** If specified, stores the fragment for this URI; otherwise null */ 129 private String m_fragment = null; 130 131 /** 132 * Construct a new and uninitialized URI. 133 */ 134 public JaxmURI() { 135 } 136 137 /** 138 * Construct a new URI from another URI. All fields for this URI are 139 * set equal to the fields of the URI passed in. 140 * 141 * @param p_other the URI to copy (cannot be null) 142 */ 143 public JaxmURI(JaxmURI p_other) { 144 initialize(p_other); 145 } 146 147 /** 148 * Construct a new URI from a URI specification string. If the 149 * specification follows the "generic URI" syntax, (two slashes 150 * following the first colon), the specification will be parsed 151 * accordingly - setting the scheme, userinfo, host,port, path, query 152 * string and fragment fields as necessary. If the specification does 153 * not follow the "generic URI" syntax, the specification is parsed 154 * into a scheme and scheme-specific part (stored as the path) only. 155 * 156 * @param p_uriSpec the URI specification string (cannot be null or 157 * empty) 158 * 159 * @exception MalformedURIException if p_uriSpec violates any syntax 160 * rules 161 */ 162 public JaxmURI(String p_uriSpec) throws MalformedURIException { 163 this((JaxmURI)null, p_uriSpec); 164 } 165 166 /** 167 * Construct a new URI from a base URI and a URI specification string. 168 * The URI specification string may be a relative URI. 169 * 170 * @param p_base the base URI (cannot be null if p_uriSpec is null or 171 * empty) 172 * @param p_uriSpec the URI specification string (cannot be null or 173 * empty if p_base is null) 174 * 175 * @exception MalformedURIException if p_uriSpec violates any syntax 176 * rules 177 */ 178 public JaxmURI(JaxmURI p_base, String p_uriSpec) throws MalformedURIException { 179 initialize(p_base, p_uriSpec); 180 } 181 182 /** 183 * Construct a new URI that does not follow the generic URI syntax. 184 * Only the scheme and scheme-specific part (stored as the path) are 185 * initialized. 186 * 187 * @param p_scheme the URI scheme (cannot be null or empty) 188 * @param p_schemeSpecificPart the scheme-specific part (cannot be 189 * null or empty) 190 * 191 * @exception MalformedURIException if p_scheme violates any 192 * syntax rules 193 */ 194 public JaxmURI(String p_scheme, String p_schemeSpecificPart) 195 throws MalformedURIException { 196 if (p_scheme == null || p_scheme.trim().length() == 0) { 197 throw new MalformedURIException( 198 "Cannot construct URI with null/empty scheme!"); 199 } 200 if (p_schemeSpecificPart == null || 201 p_schemeSpecificPart.trim().length() == 0) { 202 throw new MalformedURIException( 203 "Cannot construct URI with null/empty scheme-specific part!"); 204 } 205 setScheme(p_scheme); 206 setPath(p_schemeSpecificPart); 207 } 208 209 /** 210 * Construct a new URI that follows the generic URI syntax from its 211 * component parts. Each component is validated for syntax and some 212 * basic semantic checks are performed as well. See the individual 213 * setter methods for specifics. 214 * 215 * @param p_scheme the URI scheme (cannot be null or empty) 216 * @param p_host the hostname or IPv4 address for the URI 217 * @param p_path the URI path - if the path contains '?' or '#', 218 * then the query string and/or fragment will be 219 * set from the path; however, if the query and 220 * fragment are specified both in the path and as 221 * separate parameters, an exception is thrown 222 * @param p_queryString the URI query string (cannot be specified 223 * if path is null) 224 * @param p_fragment the URI fragment (cannot be specified if path 225 * is null) 226 * 227 * @exception MalformedURIException if any of the parameters violates 228 * syntax rules or semantic rules 229 */ 230 public JaxmURI(String p_scheme, String p_host, String p_path, 231 String p_queryString, String p_fragment) 232 throws MalformedURIException { 233 this(p_scheme, null, p_host, -1, p_path, p_queryString, p_fragment); 234 } 235 236 /** 237 * Construct a new URI that follows the generic URI syntax from its 238 * component parts. Each component is validated for syntax and some 239 * basic semantic checks are performed as well. See the individual 240 * setter methods for specifics. 241 * 242 * @param p_scheme the URI scheme (cannot be null or empty) 243 * @param p_userinfo the URI userinfo (cannot be specified if host 244 * is null) 245 * @param p_host the hostname or IPv4 address for the URI 246 * @param p_port the URI port (may be -1 for "unspecified"; cannot 247 * be specified if host is null) 248 * @param p_path the URI path - if the path contains '?' or '#', 249 * then the query string and/or fragment will be 250 * set from the path; however, if the query and 251 * fragment are specified both in the path and as 252 * separate parameters, an exception is thrown 253 * @param p_queryString the URI query string (cannot be specified 254 * if path is null) 255 * @param p_fragment the URI fragment (cannot be specified if path 256 * is null) 257 * 258 * @exception MalformedURIException if any of the parameters violates 259 * syntax rules or semantic rules 260 */ 261 public JaxmURI(String p_scheme, String p_userinfo, 262 String p_host, int p_port, String p_path, 263 String p_queryString, String p_fragment) 264 throws MalformedURIException { 265 if (p_scheme == null || p_scheme.trim().length() == 0) { 266 throw new MalformedURIException("Scheme is required!"); 267 } 268 269 if (p_host == null) { 270 if (p_userinfo != null) { 271 throw new MalformedURIException( 272 "Userinfo may not be specified if host is not specified!"); 273 } 274 if (p_port != -1) { 275 throw new MalformedURIException( 276 "Port may not be specified if host is not specified!"); 277 } 278 } 279 280 if (p_path != null) { 281 if (p_path.indexOf('?') != -1 && p_queryString != null) { 282 throw new MalformedURIException( 283 "Query string cannot be specified in path and query string!"); 284 } 285 286 if (p_path.indexOf('#') != -1 && p_fragment != null) { 287 throw new MalformedURIException( 288 "Fragment cannot be specified in both the path and fragment!"); 289 } 290 } 291 292 setScheme(p_scheme); 293 setHost(p_host); 294 setPort(p_port); 295 setUserinfo(p_userinfo); 296 setPath(p_path); 297 setQueryString(p_queryString); 298 setFragment(p_fragment); 299 } 300 301 /** 302 * Initialize all fields of this URI from another URI. 303 * 304 * @param p_other the URI to copy (cannot be null) 305 */ 306 private void initialize(JaxmURI p_other) { 307 m_scheme = p_other.getScheme(); 308 m_userinfo = p_other.getUserinfo(); 309 m_host = p_other.getHost(); 310 m_port = p_other.getPort(); 311 m_path = p_other.getPath(); 312 m_queryString = p_other.getQueryString(); 313 m_fragment = p_other.getFragment(); 314 } 315 316 /** 317 * Initializes this URI from a base URI and a URI specification string. 318 * See RFC 2396 Section 4 and Appendix B for specifications on parsing 319 * the URI and Section 5 for specifications on resolving relative URIs 320 * and relative paths. 321 * 322 * @param p_base the base URI (may be null if p_uriSpec is an absolute 323 * URI) 324 * @param p_uriSpec the URI spec string which may be an absolute or 325 * relative URI (can only be null/empty if p_base 326 * is not null) 327 * 328 * @exception MalformedURIException if p_base is null and p_uriSpec 329 * is not an absolute URI or if 330 * p_uriSpec violates syntax rules 331 */ 332 private void initialize(JaxmURI p_base, String p_uriSpec) 333 throws MalformedURIException { 334 if (p_base == null && 335 (p_uriSpec == null || p_uriSpec.trim().length() == 0)) { 336 throw new MalformedURIException( 337 "Cannot initialize URI with empty parameters."); 338 } 339 340 // just make a copy of the base if spec is empty 341 if (p_uriSpec == null || p_uriSpec.trim().length() == 0) { 342 initialize(p_base); 343 return; 344 } 345 346 String uriSpec = p_uriSpec.trim(); 347 int uriSpecLen = uriSpec.length(); 348 int index = 0; 349 350 // Check for scheme, which must be before `/'. Also handle names with 351 // DOS drive letters ('D:'), so 1-character schemes are not allowed. 352 int colonIdx = uriSpec.indexOf(':'); 353 int slashIdx = uriSpec.indexOf('/'); 354 if ((colonIdx < 2) || (colonIdx > slashIdx && slashIdx != -1)) { 355 int fragmentIdx = uriSpec.indexOf('#'); 356 // A standalone base is a valid URI according to spec 357 if (p_base == null && fragmentIdx != 0 ) { 358 throw new MalformedURIException("No scheme found in URI."); 359 } 360 } 361 else { 362 initializeScheme(uriSpec); 363 index = m_scheme.length()+1; 364 } 365 366 // two slashes means generic URI syntax, so we get the authority 367 if (((index+1) < uriSpecLen) && 368 (uriSpec.substring(index).startsWith("//"))) { 369 index += 2; 370 int startPos = index; 371 372 // get authority - everything up to path, query or fragment 373 char testChar = '\0'; 374 while (index < uriSpecLen) { 375 testChar = uriSpec.charAt(index); 376 if (testChar == '/' || testChar == '?' || testChar == '#') { 377 break; 378 } 379 index++; 380 } 381 382 // if we found authority, parse it out, otherwise we set the 383 // host to empty string 384 if (index > startPos) { 385 initializeAuthority(uriSpec.substring(startPos, index)); 386 } 387 else { 388 m_host = ""; 389 } 390 } 391 392 initializePath(uriSpec.substring(index)); 393 394 // Resolve relative URI to base URI - see RFC 2396 Section 5.2 395 // In some cases, it might make more sense to throw an exception 396 // (when scheme is specified is the string spec and the base URI 397 // is also specified, for example), but we're just following the 398 // RFC specifications 399 if (p_base != null) { 400 401 // check to see if this is the current doc - RFC 2396 5.2 #2 402 // note that this is slightly different from the RFC spec in that 403 // we don't include the check for query string being null 404 // - this handles cases where the urispec is just a query 405 // string or a fragment (e.g. "?y" or "#s") - 406 // see <http://www.ics.uci.edu/~fielding/url/test1.html> which 407 // identified this as a bug in the RFC 408 if (m_path.length() == 0 && m_scheme == null && 409 m_host == null) { 410 m_scheme = p_base.getScheme(); 411 m_userinfo = p_base.getUserinfo(); 412 m_host = p_base.getHost(); 413 m_port = p_base.getPort(); 414 m_path = p_base.getPath(); 415 416 if (m_queryString == null) { 417 m_queryString = p_base.getQueryString(); 418 } 419 return; 420 } 421 422 // check for scheme - RFC 2396 5.2 #3 423 // if we found a scheme, it means absolute URI, so we're done 424 if (m_scheme == null) { 425 m_scheme = p_base.getScheme(); 426 } 427 else { 428 return; 429 } 430 431 // check for authority - RFC 2396 5.2 #4 432 // if we found a host, then we've got a network path, so we're done 433 if (m_host == null) { 434 m_userinfo = p_base.getUserinfo(); 435 m_host = p_base.getHost(); 436 m_port = p_base.getPort(); 437 } 438 else { 439 return; 440 } 441 442 // check for absolute path - RFC 2396 5.2 #5 443 if (m_path.length() > 0 && 444 m_path.startsWith("/")) { 445 return; 446 } 447 448 // if we get to this point, we need to resolve relative path 449 // RFC 2396 5.2 #6 450 String path = ""; 451 String basePath = p_base.getPath(); 452 453 // 6a - get all but the last segment of the base URI path 454 if (basePath != null) { 455 int lastSlash = basePath.lastIndexOf('/'); 456 if (lastSlash != -1) { 457 path = basePath.substring(0, lastSlash+1); 458 } 459 } 460 461 // 6b - append the relative URI path 462 path = path.concat(m_path); 463 464 // 6c - remove all "./" where "." is a complete path segment 465 index = -1; 466 while ((index = path.indexOf("/./")) != -1) { 467 path = path.substring(0, index+1).concat(path.substring(index+3)); 468 } 469 470 // 6d - remove "." if path ends with "." as a complete path segment 471 if (path.endsWith("/.")) { 472 path = path.substring(0, path.length()-1); 473 } 474 475 // 6e - remove all "<segment>/../" where "<segment>" is a complete 476 // path segment not equal to ".." 477 index = 1; 478 int segIndex = -1; 479 String tempString = null; 480 481 while ((index = path.indexOf("/../", index)) > 0) { 482 tempString = path.substring(0, path.indexOf("/../")); 483 segIndex = tempString.lastIndexOf('/'); 484 if (segIndex != -1) { 485 if (!tempString.substring(segIndex++).equals("..")) { 486 path = path.substring(0, segIndex).concat(path.substring(index+4)); 487 } 488 else 489 index += 4; 490 } 491 else 492 index += 4; 493 } 494 495 // 6f - remove ending "<segment>/.." where "<segment>" is a 496 // complete path segment 497 if (path.endsWith("/..")) { 498 tempString = path.substring(0, path.length()-3); 499 segIndex = tempString.lastIndexOf('/'); 500 if (segIndex != -1) { 501 path = path.substring(0, segIndex+1); 502 } 503 } 504 m_path = path; 505 } 506 } 507 508 /** 509 * Initialize the scheme for this URI from a URI string spec. 510 * 511 * @param p_uriSpec the URI specification (cannot be null) 512 * 513 * @exception MalformedURIException if URI does not have a conformant 514 * scheme 515 */ 516 private void initializeScheme(String p_uriSpec) 517 throws MalformedURIException { 518 int uriSpecLen = p_uriSpec.length(); 519 int index = 0; 520 String scheme = null; 521 char testChar = '\0'; 522 523 while (index < uriSpecLen) { 524 testChar = p_uriSpec.charAt(index); 525 if (testChar == ':' || testChar == '/' || 526 testChar == '?' || testChar == '#') { 527 break; 528 } 529 index++; 530 } 531 scheme = p_uriSpec.substring(0, index); 532 533 if (scheme.length() == 0) { 534 throw new MalformedURIException("No scheme found in URI."); 535 } 536 else { 537 setScheme(scheme); 538 } 539 } 540 541 /** 542 * Initialize the authority (userinfo, host and port) for this 543 * URI from a URI string spec. 544 * 545 * @param p_uriSpec the URI specification (cannot be null) 546 * 547 * @exception MalformedURIException if p_uriSpec violates syntax rules 548 */ 549 private void initializeAuthority(String p_uriSpec) 550 throws MalformedURIException { 551 int index = 0; 552 int start = 0; 553 int end = p_uriSpec.length(); 554 char testChar = '\0'; 555 String userinfo = null; 556 557 // userinfo is everything up @ 558 if (p_uriSpec.indexOf('@', start) != -1) { 559 while (index < end) { 560 testChar = p_uriSpec.charAt(index); 561 if (testChar == '@') { 562 break; 563 } 564 index++; 565 } 566 userinfo = p_uriSpec.substring(start, index); 567 index++; 568 } 569 570 // host is everything up to ':' 571 String host = null; 572 start = index; 573 while (index < end) { 574 testChar = p_uriSpec.charAt(index); 575 if (testChar == ':') { 576 break; 577 } 578 index++; 579 } 580 host = p_uriSpec.substring(start, index); 581 int port = -1; 582 if (host.length() > 0) { 583 // port 584 if (testChar == ':') { 585 index++; 586 start = index; 587 while (index < end) { 588 index++; 589 } 590 String portStr = p_uriSpec.substring(start, index); 591 if (portStr.length() > 0) { 592 for (int i = 0; i < portStr.length(); i++) { 593 if (!isDigit(portStr.charAt(i))) { 594 throw new MalformedURIException( 595 portStr + 596 " is invalid. Port should only contain digits!"); 597 } 598 } 599 try { 600 port = Integer.parseInt(portStr); 601 } 602 catch (NumberFormatException nfe) { 603 // can't happen 604 } 605 } 606 } 607 } 608 setHost(host); 609 setPort(port); 610 setUserinfo(userinfo); 611 } 612 613 /** 614 * Initialize the path for this URI from a URI string spec. 615 * 616 * @param p_uriSpec the URI specification (cannot be null) 617 * 618 * @exception MalformedURIException if p_uriSpec violates syntax rules 619 */ 620 private void initializePath(String p_uriSpec) 621 throws MalformedURIException { 622 if (p_uriSpec == null) { 623 throw new MalformedURIException( 624 "Cannot initialize path from null string!"); 625 } 626 627 int index = 0; 628 int start = 0; 629 int end = p_uriSpec.length(); 630 char testChar = '\0'; 631 632 // path - everything up to query string or fragment 633 while (index < end) { 634 testChar = p_uriSpec.charAt(index); 635 if (testChar == '?' || testChar == '#') { 636 break; 637 } 638 // check for valid escape sequence 639 if (testChar == '%') { 640 if (index+2 >= end || 641 !isHex(p_uriSpec.charAt(index+1)) || 642 !isHex(p_uriSpec.charAt(index+2))) { 643 throw new MalformedURIException( 644 "Path contains invalid escape sequence!"); 645 } 646 } 647 else if (!isReservedCharacter(testChar) && 648 !isUnreservedCharacter(testChar)) { 649 throw new MalformedURIException( 650 "Path contains invalid character: " + testChar); 651 } 652 index++; 653 } 654 m_path = p_uriSpec.substring(start, index); 655 656 // query - starts with ? and up to fragment or end 657 if (testChar == '?') { 658 index++; 659 start = index; 660 while (index < end) { 661 testChar = p_uriSpec.charAt(index); 662 if (testChar == '#') { 663 break; 664 } 665 if (testChar == '%') { 666 if (index+2 >= end || 667 !isHex(p_uriSpec.charAt(index+1)) || 668 !isHex(p_uriSpec.charAt(index+2))) { 669 throw new MalformedURIException( 670 "Query string contains invalid escape sequence!"); 671 } 672 } 673 else if (!isReservedCharacter(testChar) && 674 !isUnreservedCharacter(testChar)) { 675 throw new MalformedURIException( 676 "Query string contains invalid character:" + testChar); 677 } 678 index++; 679 } 680 m_queryString = p_uriSpec.substring(start, index); 681 } 682 683 // fragment - starts with # 684 if (testChar == '#') { 685 index++; 686 start = index; 687 while (index < end) { 688 testChar = p_uriSpec.charAt(index); 689 690 if (testChar == '%') { 691 if (index+2 >= end || 692 !isHex(p_uriSpec.charAt(index+1)) || 693 !isHex(p_uriSpec.charAt(index+2))) { 694 throw new MalformedURIException( 695 "Fragment contains invalid escape sequence!"); 696 } 697 } 698 else if (!isReservedCharacter(testChar) && 699 !isUnreservedCharacter(testChar)) { 700 throw new MalformedURIException( 701 "Fragment contains invalid character:"+testChar); 702 } 703 index++; 704 } 705 m_fragment = p_uriSpec.substring(start, index); 706 } 707 } 708 709 /** 710 * Get the scheme for this URI. 711 * 712 * @return the scheme for this URI 713 */ 714 public String getScheme() { 715 return m_scheme; 716 } 717 718 /** 719 * Get the scheme-specific part for this URI (everything following the 720 * scheme and the first colon). See RFC 2396 Section 5.2 for spec. 721 * 722 * @return the scheme-specific part for this URI 723 */ 724 public String getSchemeSpecificPart() { 725 StringBuilder schemespec = new StringBuilder(); 726 727 if (m_userinfo != null || m_host != null || m_port != -1) { 728 schemespec.append("//"); 729 } 730 731 if (m_userinfo != null) { 732 schemespec.append(m_userinfo); 733 schemespec.append('@'); 734 } 735 736 if (m_host != null) { 737 schemespec.append(m_host); 738 } 739 740 if (m_port != -1) { 741 schemespec.append(':'); 742 schemespec.append(m_port); 743 } 744 745 if (m_path != null) { 746 schemespec.append((m_path)); 747 } 748 749 if (m_queryString != null) { 750 schemespec.append('?'); 751 schemespec.append(m_queryString); 752 } 753 754 if (m_fragment != null) { 755 schemespec.append('#'); 756 schemespec.append(m_fragment); 757 } 758 759 return schemespec.toString(); 760 } 761 762 /** 763 * Get the userinfo for this URI. 764 * 765 * @return the userinfo for this URI (null if not specified). 766 */ 767 public String getUserinfo() { 768 return m_userinfo; 769 } 770 771 /** 772 * Get the host for this URI. 773 * 774 * @return the host for this URI (null if not specified). 775 */ 776 public String getHost() { 777 return m_host; 778 } 779 780 /** 781 * Get the port for this URI. 782 * 783 * @return the port for this URI (-1 if not specified). 784 */ 785 public int getPort() { 786 return m_port; 787 } 788 789 /** 790 * Get the path for this URI (optionally with the query string and 791 * fragment). 792 * 793 * @param p_includeQueryString if true (and query string is not null), 794 * then a "?" followed by the query string 795 * will be appended 796 * @param p_includeFragment if true (and fragment is not null), 797 * then a "#" followed by the fragment 798 * will be appended 799 * 800 * @return the path for this URI possibly including the query string 801 * and fragment 802 */ 803 public String getPath(boolean p_includeQueryString, 804 boolean p_includeFragment) { 805 StringBuilder pathString = new StringBuilder(m_path); 806 807 if (p_includeQueryString && m_queryString != null) { 808 pathString.append('?'); 809 pathString.append(m_queryString); 810 } 811 812 if (p_includeFragment && m_fragment != null) { 813 pathString.append('#'); 814 pathString.append(m_fragment); 815 } 816 return pathString.toString(); 817 } 818 819 /** 820 * Get the path for this URI. Note that the value returned is the path 821 * only and does not include the query string or fragment. 822 * 823 * @return the path for this URI. 824 */ 825 public String getPath() { 826 return m_path; 827 } 828 829 /** 830 * Get the query string for this URI. 831 * 832 * @return the query string for this URI. Null is returned if there 833 * was no "?" in the URI spec, empty string if there was a 834 * "?" but no query string following it. 835 */ 836 public String getQueryString() { 837 return m_queryString; 838 } 839 840 /** 841 * Get the fragment for this URI. 842 * 843 * @return the fragment for this URI. Null is returned if there 844 * was no "#" in the URI spec, empty string if there was a 845 * "#" but no fragment following it. 846 */ 847 public String getFragment() { 848 return m_fragment; 849 } 850 851 /** 852 * Set the scheme for this URI. The scheme is converted to lowercase 853 * before it is set. 854 * 855 * @param p_scheme the scheme for this URI (cannot be null) 856 * 857 * @exception MalformedURIException if p_scheme is not a conformant 858 * scheme name 859 */ 860 public void setScheme(String p_scheme) throws MalformedURIException { 861 if (p_scheme == null) { 862 throw new MalformedURIException( 863 "Cannot set scheme from null string!"); 864 } 865 if (!isConformantSchemeName(p_scheme)) { 866 throw new MalformedURIException("The scheme is not conformant."); 867 } 868 869 m_scheme = p_scheme.toLowerCase(); 870 } 871 872 /** 873 * Set the userinfo for this URI. If a non-null value is passed in and 874 * the host value is null, then an exception is thrown. 875 * 876 * @param p_userinfo the userinfo for this URI 877 * 878 * @exception MalformedURIException if p_userinfo contains invalid 879 * characters 880 */ 881 public void setUserinfo(String p_userinfo) throws MalformedURIException { 882 if (p_userinfo == null) { 883 m_userinfo = null; 884 } 885 else { 886 if (m_host == null) { 887 throw new MalformedURIException( 888 "Userinfo cannot be set when host is null!"); 889 } 890 891 // userinfo can contain alphanumerics, mark characters, escaped 892 // and ';',':','&','=','+','$',',' 893 int index = 0; 894 int end = p_userinfo.length(); 895 char testChar = '\0'; 896 while (index < end) { 897 testChar = p_userinfo.charAt(index); 898 if (testChar == '%') { 899 if (index+2 >= end || 900 !isHex(p_userinfo.charAt(index+1)) || 901 !isHex(p_userinfo.charAt(index+2))) { 902 throw new MalformedURIException( 903 "Userinfo contains invalid escape sequence!"); 904 } 905 } 906 else if (!isUnreservedCharacter(testChar) && 907 USERINFO_CHARACTERS.indexOf(testChar) == -1) { 908 throw new MalformedURIException( 909 "Userinfo contains invalid character:"+testChar); 910 } 911 index++; 912 } 913 } 914 m_userinfo = p_userinfo; 915 } 916 917 /** 918 * Set the host for this URI. If null is passed in, the userinfo 919 * field is also set to null and the port is set to -1. 920 * 921 * @param p_host the host for this URI 922 * 923 * @exception MalformedURIException if p_host is not a valid IP 924 * address or DNS hostname. 925 */ 926 public void setHost(String p_host) throws MalformedURIException { 927 if (p_host == null || p_host.trim().length() == 0) { 928 m_host = p_host; 929 m_userinfo = null; 930 m_port = -1; 931 } 932 else if (!isWellFormedAddress(p_host)) { 933 throw new MalformedURIException("Host is not a well formed address!"); 934 } 935 m_host = p_host; 936 } 937 938 /** 939 * Set the port for this URI. -1 is used to indicate that the port is 940 * not specified, otherwise valid port numbers are between 0 and 65535. 941 * If a valid port number is passed in and the host field is null, 942 * an exception is thrown. 943 * 944 * @param p_port the port number for this URI 945 * 946 * @exception MalformedURIException if p_port is not -1 and not a 947 * valid port number 948 */ 949 public void setPort(int p_port) throws MalformedURIException { 950 if (p_port >= 0 && p_port <= 65535) { 951 if (m_host == null) { 952 throw new MalformedURIException( 953 "Port cannot be set when host is null!"); 954 } 955 } 956 else if (p_port != -1) { 957 throw new MalformedURIException("Invalid port number!"); 958 } 959 m_port = p_port; 960 } 961 962 /** 963 * Set the path for this URI. If the supplied path is null, then the 964 * query string and fragment are set to null as well. If the supplied 965 * path includes a query string and/or fragment, these fields will be 966 * parsed and set as well. Note that, for URIs following the "generic 967 * URI" syntax, the path specified should start with a slash. 968 * For URIs that do not follow the generic URI syntax, this method 969 * sets the scheme-specific part. 970 * 971 * @param p_path the path for this URI (may be null) 972 * 973 * @exception MalformedURIException if p_path contains invalid 974 * characters 975 */ 976 public void setPath(String p_path) throws MalformedURIException { 977 if (p_path == null) { 978 m_path = null; 979 m_queryString = null; 980 m_fragment = null; 981 } 982 else { 983 initializePath(p_path); 984 } 985 } 986 987 /** 988 * Append to the end of the path of this URI. If the current path does 989 * not end in a slash and the path to be appended does not begin with 990 * a slash, a slash will be appended to the current path before the 991 * new segment is added. Also, if the current path ends in a slash 992 * and the new segment begins with a slash, the extra slash will be 993 * removed before the new segment is appended. 994 * 995 * @param p_addToPath the new segment to be added to the current path 996 * 997 * @exception MalformedURIException if p_addToPath contains syntax 998 * errors 999 */ 1000 public void appendPath(String p_addToPath) 1001 throws MalformedURIException { 1002 if (p_addToPath == null || p_addToPath.trim().length() == 0) { 1003 return; 1004 } 1005 1006 if (!isURIString(p_addToPath)) { 1007 throw new MalformedURIException( 1008 "Path contains invalid character!"); 1009 } 1010 1011 if (m_path == null || m_path.trim().length() == 0) { 1012 if (p_addToPath.startsWith("/")) { 1013 m_path = p_addToPath; 1014 } 1015 else { 1016 m_path = "/" + p_addToPath; 1017 } 1018 } 1019 else if (m_path.endsWith("/")) { 1020 if (p_addToPath.startsWith("/")) { 1021 m_path = m_path.concat(p_addToPath.substring(1)); 1022 } 1023 else { 1024 m_path = m_path.concat(p_addToPath); 1025 } 1026 } 1027 else { 1028 if (p_addToPath.startsWith("/")) { 1029 m_path = m_path.concat(p_addToPath); 1030 } 1031 else { 1032 m_path = m_path.concat("/" + p_addToPath); 1033 } 1034 } 1035 } 1036 1037 /** 1038 * Set the query string for this URI. A non-null value is valid only 1039 * if this is an URI conforming to the generic URI syntax and 1040 * the path value is not null. 1041 * 1042 * @param p_queryString the query string for this URI 1043 * 1044 * @exception MalformedURIException if p_queryString is not null and this 1045 * URI does not conform to the generic 1046 * URI syntax or if the path is null 1047 */ 1048 public void setQueryString(String p_queryString) throws MalformedURIException { 1049 if (p_queryString == null) { 1050 m_queryString = null; 1051 } 1052 else if (!isGenericURI()) { 1053 throw new MalformedURIException( 1054 "Query string can only be set for a generic URI!"); 1055 } 1056 else if (getPath() == null) { 1057 throw new MalformedURIException( 1058 "Query string cannot be set when path is null!"); 1059 } 1060 else if (!isURIString(p_queryString)) { 1061 throw new MalformedURIException( 1062 "Query string contains invalid character!"); 1063 } 1064 else { 1065 m_queryString = p_queryString; 1066 } 1067 } 1068 1069 /** 1070 * Set the fragment for this URI. A non-null value is valid only 1071 * if this is a URI conforming to the generic URI syntax and 1072 * the path value is not null. 1073 * 1074 * @param p_fragment the fragment for this URI 1075 * 1076 * @exception MalformedURIException if p_fragment is not null and this 1077 * URI does not conform to the generic 1078 * URI syntax or if the path is null 1079 */ 1080 public void setFragment(String p_fragment) throws MalformedURIException { 1081 if (p_fragment == null) { 1082 m_fragment = null; 1083 } 1084 else if (!isGenericURI()) { 1085 throw new MalformedURIException( 1086 "Fragment can only be set for a generic URI!"); 1087 } 1088 else if (getPath() == null) { 1089 throw new MalformedURIException( 1090 "Fragment cannot be set when path is null!"); 1091 } 1092 else if (!isURIString(p_fragment)) { 1093 throw new MalformedURIException( 1094 "Fragment contains invalid character!"); 1095 } 1096 else { 1097 m_fragment = p_fragment; 1098 } 1099 } 1100 1101 /** 1102 * Determines if the passed-in Object is equivalent to this URI. 1103 * 1104 * @param p_test the Object to test for equality. 1105 * 1106 * @return true if p_test is a URI with all values equal to this 1107 * URI, false otherwise 1108 */ 1109 public boolean equals(Object p_test) { 1110 if (p_test instanceof JaxmURI) { 1111 JaxmURI testURI = (JaxmURI) p_test; 1112 if (((m_scheme == null && testURI.m_scheme == null) || 1113 (m_scheme != null && testURI.m_scheme != null && 1114 m_scheme.equals(testURI.m_scheme))) && 1115 ((m_userinfo == null && testURI.m_userinfo == null) || 1116 (m_userinfo != null && testURI.m_userinfo != null && 1117 m_userinfo.equals(testURI.m_userinfo))) && 1118 ((m_host == null && testURI.m_host == null) || 1119 (m_host != null && testURI.m_host != null && 1120 m_host.equals(testURI.m_host))) && 1121 m_port == testURI.m_port && 1122 ((m_path == null && testURI.m_path == null) || 1123 (m_path != null && testURI.m_path != null && 1124 m_path.equals(testURI.m_path))) && 1125 ((m_queryString == null && testURI.m_queryString == null) || 1126 (m_queryString != null && testURI.m_queryString != null && 1127 m_queryString.equals(testURI.m_queryString))) && 1128 ((m_fragment == null && testURI.m_fragment == null) || 1129 (m_fragment != null && testURI.m_fragment != null && 1130 m_fragment.equals(testURI.m_fragment)))) { 1131 return true; 1132 } 1133 } 1134 return false; 1135 } 1136 1137 public int hashCode() { 1138 // No members safe to use, just default to a constant. 1139 return 153214; 1140 } 1141 1142 /** 1143 * Get the URI as a string specification. See RFC 2396 Section 5.2. 1144 * 1145 * @return the URI string specification 1146 */ 1147 public String toString() { 1148 StringBuilder uriSpecString = new StringBuilder(); 1149 1150 if (m_scheme != null) { 1151 uriSpecString.append(m_scheme); 1152 uriSpecString.append(':'); 1153 } 1154 uriSpecString.append(getSchemeSpecificPart()); 1155 return uriSpecString.toString(); 1156 } 1157 1158 /** 1159 * Get the indicator as to whether this URI uses the "generic URI" 1160 * syntax. 1161 * 1162 * @return true if this URI uses the "generic URI" syntax, false 1163 * otherwise 1164 */ 1165 public boolean isGenericURI() { 1166 // presence of the host (whether valid or empty) means 1167 // double-slashes which means generic uri 1168 return (m_host != null); 1169 } 1170 1171 /** 1172 * Determine whether a scheme conforms to the rules for a scheme name. 1173 * A scheme is conformant if it starts with an alphanumeric, and 1174 * contains only alphanumerics, '+','-' and '.'. 1175 * 1176 * @return true if the scheme is conformant, false otherwise 1177 */ 1178 public static boolean isConformantSchemeName(String p_scheme) { 1179 if (p_scheme == null || p_scheme.trim().length() == 0) { 1180 return false; 1181 } 1182 1183 if (!isAlpha(p_scheme.charAt(0))) { 1184 return false; 1185 } 1186 1187 char testChar; 1188 for (int i = 1; i < p_scheme.length(); i++) { 1189 testChar = p_scheme.charAt(i); 1190 if (!isAlphanum(testChar) && 1191 SCHEME_CHARACTERS.indexOf(testChar) == -1) { 1192 return false; 1193 } 1194 } 1195 1196 return true; 1197 } 1198 1199 /** 1200 * Determine whether a string is syntactically capable of representing 1201 * a valid IPv4 address or the domain name of a network host. A valid 1202 * IPv4 address consists of four decimal digit groups separated by a 1203 * '.'. A hostname consists of domain labels (each of which must 1204 * begin and end with an alphanumeric but may contain '-') separated 1205 & by a '.'. See RFC 2396 Section 3.2.2. 1206 * 1207 * @return true if the string is a syntactically valid IPv4 address 1208 * or hostname 1209 */ 1210 public static boolean isWellFormedAddress(String p_address) { 1211 if (p_address == null) { 1212 return false; 1213 } 1214 1215 String address = p_address.trim(); 1216 int addrLength = address.length(); 1217 if (addrLength == 0 || addrLength > 255) { 1218 return false; 1219 } 1220 1221 if (address.startsWith(".") || address.startsWith("-")) { 1222 return false; 1223 } 1224 1225 // rightmost domain label starting with digit indicates IP address 1226 // since top level domain label can only start with an alpha 1227 // see RFC 2396 Section 3.2.2 1228 int index = address.lastIndexOf('.'); 1229 if (address.endsWith(".")) { 1230 index = address.substring(0, index).lastIndexOf('.'); 1231 } 1232 1233 if (index+1 < addrLength && isDigit(p_address.charAt(index+1))) { 1234 char testChar; 1235 int numDots = 0; 1236 1237 // make sure that 1) we see only digits and dot separators, 2) that 1238 // any dot separator is preceded and followed by a digit and 1239 // 3) that we find 3 dots 1240 for (int i = 0; i < addrLength; i++) { 1241 testChar = address.charAt(i); 1242 if (testChar == '.') { 1243 if (!isDigit(address.charAt(i-1)) || 1244 (i+1 < addrLength && !isDigit(address.charAt(i+1)))) { 1245 return false; 1246 } 1247 numDots++; 1248 } 1249 else if (!isDigit(testChar)) { 1250 return false; 1251 } 1252 } 1253 if (numDots != 3) { 1254 return false; 1255 } 1256 } 1257 else { 1258 // domain labels can contain alphanumerics and '-" 1259 // but must start and end with an alphanumeric 1260 char testChar; 1261 1262 for (int i = 0; i < addrLength; i++) { 1263 testChar = address.charAt(i); 1264 if (testChar == '.') { 1265 if (!isAlphanum(address.charAt(i-1))) { 1266 return false; 1267 } 1268 if (i+1 < addrLength && !isAlphanum(address.charAt(i+1))) { 1269 return false; 1270 } 1271 } 1272 else if (!isAlphanum(testChar) && testChar != '-') { 1273 return false; 1274 } 1275 } 1276 } 1277 return true; 1278 } 1279 1280 1281 /** 1282 * Determine whether a char is a digit. 1283 * 1284 * @return true if the char is betweeen '0' and '9', false otherwise 1285 */ 1286 private static boolean isDigit(char p_char) { 1287 return p_char >= '0' && p_char <= '9'; 1288 } 1289 1290 /** 1291 * Determine whether a character is a hexadecimal character. 1292 * 1293 * @return true if the char is betweeen '0' and '9', 'a' and 'f' 1294 * or 'A' and 'F', false otherwise 1295 */ 1296 private static boolean isHex(char p_char) { 1297 return (isDigit(p_char) || 1298 (p_char >= 'a' && p_char <= 'f') || 1299 (p_char >= 'A' && p_char <= 'F')); 1300 } 1301 1302 /** 1303 * Determine whether a char is an alphabetic character: a-z or A-Z 1304 * 1305 * @return true if the char is alphabetic, false otherwise 1306 */ 1307 private static boolean isAlpha(char p_char) { 1308 return ((p_char >= 'a' && p_char <= 'z') || 1309 (p_char >= 'A' && p_char <= 'Z' )); 1310 } 1311 1312 /** 1313 * Determine whether a char is an alphanumeric: 0-9, a-z or A-Z 1314 * 1315 * @return true if the char is alphanumeric, false otherwise 1316 */ 1317 private static boolean isAlphanum(char p_char) { 1318 return (isAlpha(p_char) || isDigit(p_char)); 1319 } 1320 1321 /** 1322 * Determine whether a character is a reserved character: 1323 * ';', '/', '?', ':', '@', '&', '=', '+', '$' or ',' 1324 * 1325 * @return true if the string contains any reserved characters 1326 */ 1327 private static boolean isReservedCharacter(char p_char) { 1328 return RESERVED_CHARACTERS.indexOf(p_char) != -1; 1329 } 1330 1331 /** 1332 * Determine whether a char is an unreserved character. 1333 * 1334 * @return true if the char is unreserved, false otherwise 1335 */ 1336 private static boolean isUnreservedCharacter(char p_char) { 1337 return (isAlphanum(p_char) || 1338 MARK_CHARACTERS.indexOf(p_char) != -1); 1339 } 1340 1341 /** 1342 * Determine whether a given string contains only URI characters (also 1343 * called "uric" in RFC 2396). uric consist of all reserved 1344 * characters, unreserved characters and escaped characters. 1345 * 1346 * @return true if the string is comprised of uric, false otherwise 1347 */ 1348 private static boolean isURIString(String p_uric) { 1349 if (p_uric == null) { 1350 return false; 1351 } 1352 int end = p_uric.length(); 1353 char testChar = '\0'; 1354 for (int i = 0; i < end; i++) { 1355 testChar = p_uric.charAt(i); 1356 if (testChar == '%') { 1357 if (i+2 >= end || 1358 !isHex(p_uric.charAt(i+1)) || 1359 !isHex(p_uric.charAt(i+2))) { 1360 return false; 1361 } 1362 else { 1363 i += 2; 1364 continue; 1365 } 1366 } 1367 if (isReservedCharacter(testChar) || 1368 isUnreservedCharacter(testChar)) { 1369 continue; 1370 } 1371 else { 1372 return false; 1373 } 1374 } 1375 return true; 1376 } 1377 }