1 /* 2 * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package com.sun.xml.internal.messaging.saaj.util; 27 28 // Imported from: org.apache.xerces.util 29 // Needed to work around differences in JDK1.2 and 1.3 and deal with userInfo 30 31 import java.io.IOException; 32 import java.io.Serializable; 33 34 35 /********************************************************************** 36 * A class to represent a Uniform Resource Identifier (URI). This class 37 * is designed to handle the parsing of URIs and provide access to 38 * the various components (scheme, host, port, userinfo, path, query 39 * string and fragment) that may constitute a URI. 40 * <p> 41 * Parsing of a URI specification is done according to the URI 42 * syntax described in <a href="http://www.ietf.org/rfc/rfc2396.txt?number=2396"> 43 * RFC 2396</a>. Every URI consists 44 * of a scheme, followed by a colon (':'), followed by a scheme-specific 45 * part. For URIs that follow the "generic URI" syntax, the scheme- 46 * specific part begins with two slashes ("//") and may be followed 47 * by an authority segment (comprised of user information, host, and 48 * port), path segment, query segment and fragment. Note that RFC 2396 49 * no longer specifies the use of the parameters segment and excludes 50 * the "user:password" syntax as part of the authority segment. If 51 * "user:password" appears in a URI, the entire user/password string 52 * is stored as userinfo. 53 * <p> 54 * For URIs that do not follow the "generic URI" syntax (e.g. mailto), 55 * the entire scheme-specific part is treated as the "path" portion 56 * of the URI. 57 * <p> 58 * Note that, unlike the java.net.URL class, this class does not provide 59 * any built-in network access functionality nor does it provide any 60 * scheme-specific functionality (for example, it does not know a 61 * default port for a specific scheme). Rather, it only knows the 62 * grammar and basic set of operations that can be applied to a URI. 63 * 64 **********************************************************************/ 65 public class JaxmURI implements Serializable { 66 67 /******************************************************************* 68 * MalformedURIExceptions are thrown in the process of building a URI 69 * or setting fields on a URI when an operation would result in an 70 * invalid URI specification. 71 * 72 ********************************************************************/ 73 public static class MalformedURIException extends IOException { 74 75 /****************************************************************** 76 * Constructs a <code>MalformedURIException</code> with no specified 77 * detail message. 78 ******************************************************************/ 79 public MalformedURIException() { 80 super(); 81 } 82 83 /***************************************************************** 84 * Constructs a <code>MalformedURIException</code> with the 85 * specified detail message. 86 * 87 * @param p_msg the detail message. 88 ******************************************************************/ 89 public MalformedURIException(String p_msg) { 90 super(p_msg); 91 } 92 } 93 94 /** reserved characters */ 95 private static final String RESERVED_CHARACTERS = ";/?:@&=+$,"; 96 97 /** URI punctuation mark characters - these, combined with 98 alphanumerics, constitute the "unreserved" characters */ 99 private static final String MARK_CHARACTERS = "-_.!~*'() "; 100 101 /** scheme can be composed of alphanumerics and these characters */ 102 private static final String SCHEME_CHARACTERS = "+-."; 103 104 /** userinfo can be composed of unreserved, escaped and these 105 characters */ 106 private static final String USERINFO_CHARACTERS = ";:&=+$,"; 107 108 /** Stores the scheme (usually the protocol) for this URI. */ 109 private String m_scheme = null; 110 111 /** If specified, stores the userinfo for this URI; otherwise null */ 112 private String m_userinfo = null; 113 114 /** If specified, stores the host for this URI; otherwise null */ 115 private String m_host = null; 116 117 /** If specified, stores the port for this URI; otherwise -1 */ 118 private int m_port = -1; 119 120 /** If specified, stores the path for this URI; otherwise null */ 121 private String m_path = null; 122 123 /** If specified, stores the query string for this URI; otherwise 124 null. */ 125 private String m_queryString = null; 126 127 /** If specified, stores the fragment for this URI; otherwise null */ 128 private String m_fragment = null; 129 130 /** 131 * Construct a new and uninitialized URI. 132 */ 133 public JaxmURI() { 134 } 135 136 /** 137 * Construct a new URI from another URI. All fields for this URI are 138 * set equal to the fields of the URI passed in. 139 * 140 * @param p_other the URI to copy (cannot be null) 141 */ 142 public JaxmURI(JaxmURI p_other) { 143 initialize(p_other); 144 } 145 146 /** 147 * Construct a new URI from a URI specification string. If the 148 * specification follows the "generic URI" syntax, (two slashes 149 * following the first colon), the specification will be parsed 150 * accordingly - setting the scheme, userinfo, host,port, path, query 151 * string and fragment fields as necessary. If the specification does 152 * not follow the "generic URI" syntax, the specification is parsed 153 * into a scheme and scheme-specific part (stored as the path) only. 154 * 155 * @param p_uriSpec the URI specification string (cannot be null or 156 * empty) 157 * 158 * @exception MalformedURIException if p_uriSpec violates any syntax 159 * rules 160 */ 161 public JaxmURI(String p_uriSpec) throws MalformedURIException { 162 this((JaxmURI)null, p_uriSpec); 163 } 164 165 /** 166 * Construct a new URI from a base URI and a URI specification string. 167 * The URI specification string may be a relative URI. 168 * 169 * @param p_base the base URI (cannot be null if p_uriSpec is null or 170 * empty) 171 * @param p_uriSpec the URI specification string (cannot be null or 172 * empty if p_base is null) 173 * 174 * @exception MalformedURIException if p_uriSpec violates any syntax 175 * rules 176 */ 177 public JaxmURI(JaxmURI p_base, String p_uriSpec) throws MalformedURIException { 178 initialize(p_base, p_uriSpec); 179 } 180 181 /** 182 * Construct a new URI that does not follow the generic URI syntax. 183 * Only the scheme and scheme-specific part (stored as the path) are 184 * initialized. 185 * 186 * @param p_scheme the URI scheme (cannot be null or empty) 187 * @param p_schemeSpecificPart the scheme-specific part (cannot be 188 * null or empty) 189 * 190 * @exception MalformedURIException if p_scheme violates any 191 * syntax rules 192 */ 193 public JaxmURI(String p_scheme, String p_schemeSpecificPart) 194 throws MalformedURIException { 195 if (p_scheme == null || p_scheme.trim().length() == 0) { 196 throw new MalformedURIException( 197 "Cannot construct URI with null/empty scheme!"); 198 } 199 if (p_schemeSpecificPart == null || 200 p_schemeSpecificPart.trim().length() == 0) { 201 throw new MalformedURIException( 202 "Cannot construct URI with null/empty scheme-specific part!"); 203 } 204 setScheme(p_scheme); 205 setPath(p_schemeSpecificPart); 206 } 207 208 /** 209 * Construct a new URI that follows the generic URI syntax from its 210 * component parts. Each component is validated for syntax and some 211 * basic semantic checks are performed as well. See the individual 212 * setter methods for specifics. 213 * 214 * @param p_scheme the URI scheme (cannot be null or empty) 215 * @param p_host the hostname or IPv4 address for the URI 216 * @param p_path the URI path - if the path contains '?' or '#', 217 * then the query string and/or fragment will be 218 * set from the path; however, if the query and 219 * fragment are specified both in the path and as 220 * separate parameters, an exception is thrown 221 * @param p_queryString the URI query string (cannot be specified 222 * if path is null) 223 * @param p_fragment the URI fragment (cannot be specified if path 224 * is null) 225 * 226 * @exception MalformedURIException if any of the parameters violates 227 * syntax rules or semantic rules 228 */ 229 public JaxmURI(String p_scheme, String p_host, String p_path, 230 String p_queryString, String p_fragment) 231 throws MalformedURIException { 232 this(p_scheme, null, p_host, -1, p_path, p_queryString, p_fragment); 233 } 234 235 /** 236 * Construct a new URI that follows the generic URI syntax from its 237 * component parts. Each component is validated for syntax and some 238 * basic semantic checks are performed as well. See the individual 239 * setter methods for specifics. 240 * 241 * @param p_scheme the URI scheme (cannot be null or empty) 242 * @param p_userinfo the URI userinfo (cannot be specified if host 243 * is null) 244 * @param p_host the hostname or IPv4 address for the URI 245 * @param p_port the URI port (may be -1 for "unspecified"; cannot 246 * be specified if host is null) 247 * @param p_path the URI path - if the path contains '?' or '#', 248 * then the query string and/or fragment will be 249 * set from the path; however, if the query and 250 * fragment are specified both in the path and as 251 * separate parameters, an exception is thrown 252 * @param p_queryString the URI query string (cannot be specified 253 * if path is null) 254 * @param p_fragment the URI fragment (cannot be specified if path 255 * is null) 256 * 257 * @exception MalformedURIException if any of the parameters violates 258 * syntax rules or semantic rules 259 */ 260 public JaxmURI(String p_scheme, String p_userinfo, 261 String p_host, int p_port, String p_path, 262 String p_queryString, String p_fragment) 263 throws MalformedURIException { 264 if (p_scheme == null || p_scheme.trim().length() == 0) { 265 throw new MalformedURIException("Scheme is required!"); 266 } 267 268 if (p_host == null) { 269 if (p_userinfo != null) { 270 throw new MalformedURIException( 271 "Userinfo may not be specified if host is not specified!"); 272 } 273 if (p_port != -1) { 274 throw new MalformedURIException( 275 "Port may not be specified if host is not specified!"); 276 } 277 } 278 279 if (p_path != null) { 280 if (p_path.indexOf('?') != -1 && p_queryString != null) { 281 throw new MalformedURIException( 282 "Query string cannot be specified in path and query string!"); 283 } 284 285 if (p_path.indexOf('#') != -1 && p_fragment != null) { 286 throw new MalformedURIException( 287 "Fragment cannot be specified in both the path and fragment!"); 288 } 289 } 290 291 setScheme(p_scheme); 292 setHost(p_host); 293 setPort(p_port); 294 setUserinfo(p_userinfo); 295 setPath(p_path); 296 setQueryString(p_queryString); 297 setFragment(p_fragment); 298 } 299 300 /** 301 * Initialize all fields of this URI from another URI. 302 * 303 * @param p_other the URI to copy (cannot be null) 304 */ 305 private void initialize(JaxmURI p_other) { 306 m_scheme = p_other.getScheme(); 307 m_userinfo = p_other.getUserinfo(); 308 m_host = p_other.getHost(); 309 m_port = p_other.getPort(); 310 m_path = p_other.getPath(); 311 m_queryString = p_other.getQueryString(); 312 m_fragment = p_other.getFragment(); 313 } 314 315 /** 316 * Initializes this URI from a base URI and a URI specification string. 317 * See RFC 2396 Section 4 and Appendix B for specifications on parsing 318 * the URI and Section 5 for specifications on resolving relative URIs 319 * and relative paths. 320 * 321 * @param p_base the base URI (may be null if p_uriSpec is an absolute 322 * URI) 323 * @param p_uriSpec the URI spec string which may be an absolute or 324 * relative URI (can only be null/empty if p_base 325 * is not null) 326 * 327 * @exception MalformedURIException if p_base is null and p_uriSpec 328 * is not an absolute URI or if 329 * p_uriSpec violates syntax rules 330 */ 331 private void initialize(JaxmURI p_base, String p_uriSpec) 332 throws MalformedURIException { 333 if (p_base == null && 334 (p_uriSpec == null || p_uriSpec.trim().length() == 0)) { 335 throw new MalformedURIException( 336 "Cannot initialize URI with empty parameters."); 337 } 338 339 // just make a copy of the base if spec is empty 340 if (p_uriSpec == null || p_uriSpec.trim().length() == 0) { 341 initialize(p_base); 342 return; 343 } 344 345 String uriSpec = p_uriSpec.trim(); 346 int uriSpecLen = uriSpec.length(); 347 int index = 0; 348 349 // Check for scheme, which must be before `/'. Also handle names with 350 // DOS drive letters ('D:'), so 1-character schemes are not allowed. 351 int colonIdx = uriSpec.indexOf(':'); 352 int slashIdx = uriSpec.indexOf('/'); 353 if ((colonIdx < 2) || (colonIdx > slashIdx && slashIdx != -1)) { 354 int fragmentIdx = uriSpec.indexOf('#'); 355 // A standalone base is a valid URI according to spec 356 if (p_base == null && fragmentIdx != 0 ) { 357 throw new MalformedURIException("No scheme found in URI."); 358 } 359 } 360 else { 361 initializeScheme(uriSpec); 362 index = m_scheme.length()+1; 363 } 364 365 // two slashes means generic URI syntax, so we get the authority 366 if (((index+1) < uriSpecLen) && 367 (uriSpec.substring(index).startsWith("//"))) { 368 index += 2; 369 int startPos = index; 370 371 // get authority - everything up to path, query or fragment 372 char testChar = '\0'; 373 while (index < uriSpecLen) { 374 testChar = uriSpec.charAt(index); 375 if (testChar == '/' || testChar == '?' || testChar == '#') { 376 break; 377 } 378 index++; 379 } 380 381 // if we found authority, parse it out, otherwise we set the 382 // host to empty string 383 if (index > startPos) { 384 initializeAuthority(uriSpec.substring(startPos, index)); 385 } 386 else { 387 m_host = ""; 388 } 389 } 390 391 initializePath(uriSpec.substring(index)); 392 393 // Resolve relative URI to base URI - see RFC 2396 Section 5.2 394 // In some cases, it might make more sense to throw an exception 395 // (when scheme is specified is the string spec and the base URI 396 // is also specified, for example), but we're just following the 397 // RFC specifications 398 if (p_base != null) { 399 400 // check to see if this is the current doc - RFC 2396 5.2 #2 401 // note that this is slightly different from the RFC spec in that 402 // we don't include the check for query string being null 403 // - this handles cases where the urispec is just a query 404 // string or a fragment (e.g. "?y" or "#s") - 405 // see <http://www.ics.uci.edu/~fielding/url/test1.html> which 406 // identified this as a bug in the RFC 407 if (m_path.length() == 0 && m_scheme == null && 408 m_host == null) { 409 m_scheme = p_base.getScheme(); 410 m_userinfo = p_base.getUserinfo(); 411 m_host = p_base.getHost(); 412 m_port = p_base.getPort(); 413 m_path = p_base.getPath(); 414 415 if (m_queryString == null) { 416 m_queryString = p_base.getQueryString(); 417 } 418 return; 419 } 420 421 // check for scheme - RFC 2396 5.2 #3 422 // if we found a scheme, it means absolute URI, so we're done 423 if (m_scheme == null) { 424 m_scheme = p_base.getScheme(); 425 } 426 else { 427 return; 428 } 429 430 // check for authority - RFC 2396 5.2 #4 431 // if we found a host, then we've got a network path, so we're done 432 if (m_host == null) { 433 m_userinfo = p_base.getUserinfo(); 434 m_host = p_base.getHost(); 435 m_port = p_base.getPort(); 436 } 437 else { 438 return; 439 } 440 441 // check for absolute path - RFC 2396 5.2 #5 442 if (m_path.length() > 0 && 443 m_path.startsWith("/")) { 444 return; 445 } 446 447 // if we get to this point, we need to resolve relative path 448 // RFC 2396 5.2 #6 449 String path = ""; 450 String basePath = p_base.getPath(); 451 452 // 6a - get all but the last segment of the base URI path 453 if (basePath != null) { 454 int lastSlash = basePath.lastIndexOf('/'); 455 if (lastSlash != -1) { 456 path = basePath.substring(0, lastSlash+1); 457 } 458 } 459 460 // 6b - append the relative URI path 461 path = path.concat(m_path); 462 463 // 6c - remove all "./" where "." is a complete path segment 464 index = -1; 465 while ((index = path.indexOf("/./")) != -1) { 466 path = path.substring(0, index+1).concat(path.substring(index+3)); 467 } 468 469 // 6d - remove "." if path ends with "." as a complete path segment 470 if (path.endsWith("/.")) { 471 path = path.substring(0, path.length()-1); 472 } 473 474 // 6e - remove all "<segment>/../" where "<segment>" is a complete 475 // path segment not equal to ".." 476 index = 1; 477 int segIndex = -1; 478 String tempString = null; 479 480 while ((index = path.indexOf("/../", index)) > 0) { 481 tempString = path.substring(0, path.indexOf("/../")); 482 segIndex = tempString.lastIndexOf('/'); 483 if (segIndex != -1) { 484 if (!tempString.substring(segIndex++).equals("..")) { 485 path = path.substring(0, segIndex).concat(path.substring(index+4)); 486 } 487 else 488 index += 4; 489 } 490 else 491 index += 4; 492 } 493 494 // 6f - remove ending "<segment>/.." where "<segment>" is a 495 // complete path segment 496 if (path.endsWith("/..")) { 497 tempString = path.substring(0, path.length()-3); 498 segIndex = tempString.lastIndexOf('/'); 499 if (segIndex != -1) { 500 path = path.substring(0, segIndex+1); 501 } 502 } 503 m_path = path; 504 } 505 } 506 507 /** 508 * Initialize the scheme for this URI from a URI string spec. 509 * 510 * @param p_uriSpec the URI specification (cannot be null) 511 * 512 * @exception MalformedURIException if URI does not have a conformant 513 * scheme 514 */ 515 private void initializeScheme(String p_uriSpec) 516 throws MalformedURIException { 517 int uriSpecLen = p_uriSpec.length(); 518 int index = 0; 519 String scheme = null; 520 char testChar = '\0'; 521 522 while (index < uriSpecLen) { 523 testChar = p_uriSpec.charAt(index); 524 if (testChar == ':' || testChar == '/' || 525 testChar == '?' || testChar == '#') { 526 break; 527 } 528 index++; 529 } 530 scheme = p_uriSpec.substring(0, index); 531 532 if (scheme.length() == 0) { 533 throw new MalformedURIException("No scheme found in URI."); 534 } 535 else { 536 setScheme(scheme); 537 } 538 } 539 540 /** 541 * Initialize the authority (userinfo, host and port) for this 542 * URI from a URI string spec. 543 * 544 * @param p_uriSpec the URI specification (cannot be null) 545 * 546 * @exception MalformedURIException if p_uriSpec violates syntax rules 547 */ 548 private void initializeAuthority(String p_uriSpec) 549 throws MalformedURIException { 550 int index = 0; 551 int start = 0; 552 int end = p_uriSpec.length(); 553 char testChar = '\0'; 554 String userinfo = null; 555 556 // userinfo is everything up @ 557 if (p_uriSpec.indexOf('@', start) != -1) { 558 while (index < end) { 559 testChar = p_uriSpec.charAt(index); 560 if (testChar == '@') { 561 break; 562 } 563 index++; 564 } 565 userinfo = p_uriSpec.substring(start, index); 566 index++; 567 } 568 569 // host is everything up to ':' 570 String host = null; 571 start = index; 572 while (index < end) { 573 testChar = p_uriSpec.charAt(index); 574 if (testChar == ':') { 575 break; 576 } 577 index++; 578 } 579 host = p_uriSpec.substring(start, index); 580 int port = -1; 581 if (host.length() > 0) { 582 // port 583 if (testChar == ':') { 584 index++; 585 start = index; 586 while (index < end) { 587 index++; 588 } 589 String portStr = p_uriSpec.substring(start, index); 590 if (portStr.length() > 0) { 591 for (int i = 0; i < portStr.length(); i++) { 592 if (!isDigit(portStr.charAt(i))) { 593 throw new MalformedURIException( 594 portStr + 595 " is invalid. Port should only contain digits!"); 596 } 597 } 598 try { 599 port = Integer.parseInt(portStr); 600 } 601 catch (NumberFormatException nfe) { 602 // can't happen 603 } 604 } 605 } 606 } 607 setHost(host); 608 setPort(port); 609 setUserinfo(userinfo); 610 } 611 612 /** 613 * Initialize the path for this URI from a URI string spec. 614 * 615 * @param p_uriSpec the URI specification (cannot be null) 616 * 617 * @exception MalformedURIException if p_uriSpec violates syntax rules 618 */ 619 private void initializePath(String p_uriSpec) 620 throws MalformedURIException { 621 if (p_uriSpec == null) { 622 throw new MalformedURIException( 623 "Cannot initialize path from null string!"); 624 } 625 626 int index = 0; 627 int start = 0; 628 int end = p_uriSpec.length(); 629 char testChar = '\0'; 630 631 // path - everything up to query string or fragment 632 while (index < end) { 633 testChar = p_uriSpec.charAt(index); 634 if (testChar == '?' || testChar == '#') { 635 break; 636 } 637 // check for valid escape sequence 638 if (testChar == '%') { 639 if (index+2 >= end || 640 !isHex(p_uriSpec.charAt(index+1)) || 641 !isHex(p_uriSpec.charAt(index+2))) { 642 throw new MalformedURIException( 643 "Path contains invalid escape sequence!"); 644 } 645 } 646 else if (!isReservedCharacter(testChar) && 647 !isUnreservedCharacter(testChar)) { 648 throw new MalformedURIException( 649 "Path contains invalid character: " + testChar); 650 } 651 index++; 652 } 653 m_path = p_uriSpec.substring(start, index); 654 655 // query - starts with ? and up to fragment or end 656 if (testChar == '?') { 657 index++; 658 start = index; 659 while (index < end) { 660 testChar = p_uriSpec.charAt(index); 661 if (testChar == '#') { 662 break; 663 } 664 if (testChar == '%') { 665 if (index+2 >= end || 666 !isHex(p_uriSpec.charAt(index+1)) || 667 !isHex(p_uriSpec.charAt(index+2))) { 668 throw new MalformedURIException( 669 "Query string contains invalid escape sequence!"); 670 } 671 } 672 else if (!isReservedCharacter(testChar) && 673 !isUnreservedCharacter(testChar)) { 674 throw new MalformedURIException( 675 "Query string contains invalid character:" + testChar); 676 } 677 index++; 678 } 679 m_queryString = p_uriSpec.substring(start, index); 680 } 681 682 // fragment - starts with # 683 if (testChar == '#') { 684 index++; 685 start = index; 686 while (index < end) { 687 testChar = p_uriSpec.charAt(index); 688 689 if (testChar == '%') { 690 if (index+2 >= end || 691 !isHex(p_uriSpec.charAt(index+1)) || 692 !isHex(p_uriSpec.charAt(index+2))) { 693 throw new MalformedURIException( 694 "Fragment contains invalid escape sequence!"); 695 } 696 } 697 else if (!isReservedCharacter(testChar) && 698 !isUnreservedCharacter(testChar)) { 699 throw new MalformedURIException( 700 "Fragment contains invalid character:"+testChar); 701 } 702 index++; 703 } 704 m_fragment = p_uriSpec.substring(start, index); 705 } 706 } 707 708 /** 709 * Get the scheme for this URI. 710 * 711 * @return the scheme for this URI 712 */ 713 public String getScheme() { 714 return m_scheme; 715 } 716 717 /** 718 * Get the scheme-specific part for this URI (everything following the 719 * scheme and the first colon). See RFC 2396 Section 5.2 for spec. 720 * 721 * @return the scheme-specific part for this URI 722 */ 723 public String getSchemeSpecificPart() { 724 StringBuilder schemespec = new StringBuilder(); 725 726 if (m_userinfo != null || m_host != null || m_port != -1) { 727 schemespec.append("//"); 728 } 729 730 if (m_userinfo != null) { 731 schemespec.append(m_userinfo); 732 schemespec.append('@'); 733 } 734 735 if (m_host != null) { 736 schemespec.append(m_host); 737 } 738 739 if (m_port != -1) { 740 schemespec.append(':'); 741 schemespec.append(m_port); 742 } 743 744 if (m_path != null) { 745 schemespec.append((m_path)); 746 } 747 748 if (m_queryString != null) { 749 schemespec.append('?'); 750 schemespec.append(m_queryString); 751 } 752 753 if (m_fragment != null) { 754 schemespec.append('#'); 755 schemespec.append(m_fragment); 756 } 757 758 return schemespec.toString(); 759 } 760 761 /** 762 * Get the userinfo for this URI. 763 * 764 * @return the userinfo for this URI (null if not specified). 765 */ 766 public String getUserinfo() { 767 return m_userinfo; 768 } 769 770 /** 771 * Get the host for this URI. 772 * 773 * @return the host for this URI (null if not specified). 774 */ 775 public String getHost() { 776 return m_host; 777 } 778 779 /** 780 * Get the port for this URI. 781 * 782 * @return the port for this URI (-1 if not specified). 783 */ 784 public int getPort() { 785 return m_port; 786 } 787 788 /** 789 * Get the path for this URI (optionally with the query string and 790 * fragment). 791 * 792 * @param p_includeQueryString if true (and query string is not null), 793 * then a "?" followed by the query string 794 * will be appended 795 * @param p_includeFragment if true (and fragment is not null), 796 * then a "#" followed by the fragment 797 * will be appended 798 * 799 * @return the path for this URI possibly including the query string 800 * and fragment 801 */ 802 public String getPath(boolean p_includeQueryString, 803 boolean p_includeFragment) { 804 StringBuilder pathString = new StringBuilder(m_path); 805 806 if (p_includeQueryString && m_queryString != null) { 807 pathString.append('?'); 808 pathString.append(m_queryString); 809 } 810 811 if (p_includeFragment && m_fragment != null) { 812 pathString.append('#'); 813 pathString.append(m_fragment); 814 } 815 return pathString.toString(); 816 } 817 818 /** 819 * Get the path for this URI. Note that the value returned is the path 820 * only and does not include the query string or fragment. 821 * 822 * @return the path for this URI. 823 */ 824 public String getPath() { 825 return m_path; 826 } 827 828 /** 829 * Get the query string for this URI. 830 * 831 * @return the query string for this URI. Null is returned if there 832 * was no "?" in the URI spec, empty string if there was a 833 * "?" but no query string following it. 834 */ 835 public String getQueryString() { 836 return m_queryString; 837 } 838 839 /** 840 * Get the fragment for this URI. 841 * 842 * @return the fragment for this URI. Null is returned if there 843 * was no "#" in the URI spec, empty string if there was a 844 * "#" but no fragment following it. 845 */ 846 public String getFragment() { 847 return m_fragment; 848 } 849 850 /** 851 * Set the scheme for this URI. The scheme is converted to lowercase 852 * before it is set. 853 * 854 * @param p_scheme the scheme for this URI (cannot be null) 855 * 856 * @exception MalformedURIException if p_scheme is not a conformant 857 * scheme name 858 */ 859 public void setScheme(String p_scheme) throws MalformedURIException { 860 if (p_scheme == null) { 861 throw new MalformedURIException( 862 "Cannot set scheme from null string!"); 863 } 864 if (!isConformantSchemeName(p_scheme)) { 865 throw new MalformedURIException("The scheme is not conformant."); 866 } 867 868 m_scheme = p_scheme.toLowerCase(); 869 } 870 871 /** 872 * Set the userinfo for this URI. If a non-null value is passed in and 873 * the host value is null, then an exception is thrown. 874 * 875 * @param p_userinfo the userinfo for this URI 876 * 877 * @exception MalformedURIException if p_userinfo contains invalid 878 * characters 879 */ 880 public void setUserinfo(String p_userinfo) throws MalformedURIException { 881 if (p_userinfo == null) { 882 m_userinfo = null; 883 } 884 else { 885 if (m_host == null) { 886 throw new MalformedURIException( 887 "Userinfo cannot be set when host is null!"); 888 } 889 890 // userinfo can contain alphanumerics, mark characters, escaped 891 // and ';',':','&','=','+','$',',' 892 int index = 0; 893 int end = p_userinfo.length(); 894 char testChar = '\0'; 895 while (index < end) { 896 testChar = p_userinfo.charAt(index); 897 if (testChar == '%') { 898 if (index+2 >= end || 899 !isHex(p_userinfo.charAt(index+1)) || 900 !isHex(p_userinfo.charAt(index+2))) { 901 throw new MalformedURIException( 902 "Userinfo contains invalid escape sequence!"); 903 } 904 } 905 else if (!isUnreservedCharacter(testChar) && 906 USERINFO_CHARACTERS.indexOf(testChar) == -1) { 907 throw new MalformedURIException( 908 "Userinfo contains invalid character:"+testChar); 909 } 910 index++; 911 } 912 } 913 m_userinfo = p_userinfo; 914 } 915 916 /** 917 * Set the host for this URI. If null is passed in, the userinfo 918 * field is also set to null and the port is set to -1. 919 * 920 * @param p_host the host for this URI 921 * 922 * @exception MalformedURIException if p_host is not a valid IP 923 * address or DNS hostname. 924 */ 925 public void setHost(String p_host) throws MalformedURIException { 926 if (p_host == null || p_host.trim().length() == 0) { 927 m_host = p_host; 928 m_userinfo = null; 929 m_port = -1; 930 } 931 else if (!isWellFormedAddress(p_host)) { 932 throw new MalformedURIException("Host is not a well formed address!"); 933 } 934 m_host = p_host; 935 } 936 937 /** 938 * Set the port for this URI. -1 is used to indicate that the port is 939 * not specified, otherwise valid port numbers are between 0 and 65535. 940 * If a valid port number is passed in and the host field is null, 941 * an exception is thrown. 942 * 943 * @param p_port the port number for this URI 944 * 945 * @exception MalformedURIException if p_port is not -1 and not a 946 * valid port number 947 */ 948 public void setPort(int p_port) throws MalformedURIException { 949 if (p_port >= 0 && p_port <= 65535) { 950 if (m_host == null) { 951 throw new MalformedURIException( 952 "Port cannot be set when host is null!"); 953 } 954 } 955 else if (p_port != -1) { 956 throw new MalformedURIException("Invalid port number!"); 957 } 958 m_port = p_port; 959 } 960 961 /** 962 * Set the path for this URI. If the supplied path is null, then the 963 * query string and fragment are set to null as well. If the supplied 964 * path includes a query string and/or fragment, these fields will be 965 * parsed and set as well. Note that, for URIs following the "generic 966 * URI" syntax, the path specified should start with a slash. 967 * For URIs that do not follow the generic URI syntax, this method 968 * sets the scheme-specific part. 969 * 970 * @param p_path the path for this URI (may be null) 971 * 972 * @exception MalformedURIException if p_path contains invalid 973 * characters 974 */ 975 public void setPath(String p_path) throws MalformedURIException { 976 if (p_path == null) { 977 m_path = null; 978 m_queryString = null; 979 m_fragment = null; 980 } 981 else { 982 initializePath(p_path); 983 } 984 } 985 986 /** 987 * Append to the end of the path of this URI. If the current path does 988 * not end in a slash and the path to be appended does not begin with 989 * a slash, a slash will be appended to the current path before the 990 * new segment is added. Also, if the current path ends in a slash 991 * and the new segment begins with a slash, the extra slash will be 992 * removed before the new segment is appended. 993 * 994 * @param p_addToPath the new segment to be added to the current path 995 * 996 * @exception MalformedURIException if p_addToPath contains syntax 997 * errors 998 */ 999 public void appendPath(String p_addToPath) 1000 throws MalformedURIException { 1001 if (p_addToPath == null || p_addToPath.trim().length() == 0) { 1002 return; 1003 } 1004 1005 if (!isURIString(p_addToPath)) { 1006 throw new MalformedURIException( 1007 "Path contains invalid character!"); 1008 } 1009 1010 if (m_path == null || m_path.trim().length() == 0) { 1011 if (p_addToPath.startsWith("/")) { 1012 m_path = p_addToPath; 1013 } 1014 else { 1015 m_path = "/" + p_addToPath; 1016 } 1017 } 1018 else if (m_path.endsWith("/")) { 1019 if (p_addToPath.startsWith("/")) { 1020 m_path = m_path.concat(p_addToPath.substring(1)); 1021 } 1022 else { 1023 m_path = m_path.concat(p_addToPath); 1024 } 1025 } 1026 else { 1027 if (p_addToPath.startsWith("/")) { 1028 m_path = m_path.concat(p_addToPath); 1029 } 1030 else { 1031 m_path = m_path.concat("/" + p_addToPath); 1032 } 1033 } 1034 } 1035 1036 /** 1037 * Set the query string for this URI. A non-null value is valid only 1038 * if this is an URI conforming to the generic URI syntax and 1039 * the path value is not null. 1040 * 1041 * @param p_queryString the query string for this URI 1042 * 1043 * @exception MalformedURIException if p_queryString is not null and this 1044 * URI does not conform to the generic 1045 * URI syntax or if the path is null 1046 */ 1047 public void setQueryString(String p_queryString) throws MalformedURIException { 1048 if (p_queryString == null) { 1049 m_queryString = null; 1050 } 1051 else if (!isGenericURI()) { 1052 throw new MalformedURIException( 1053 "Query string can only be set for a generic URI!"); 1054 } 1055 else if (getPath() == null) { 1056 throw new MalformedURIException( 1057 "Query string cannot be set when path is null!"); 1058 } 1059 else if (!isURIString(p_queryString)) { 1060 throw new MalformedURIException( 1061 "Query string contains invalid character!"); 1062 } 1063 else { 1064 m_queryString = p_queryString; 1065 } 1066 } 1067 1068 /** 1069 * Set the fragment for this URI. A non-null value is valid only 1070 * if this is a URI conforming to the generic URI syntax and 1071 * the path value is not null. 1072 * 1073 * @param p_fragment the fragment for this URI 1074 * 1075 * @exception MalformedURIException if p_fragment is not null and this 1076 * URI does not conform to the generic 1077 * URI syntax or if the path is null 1078 */ 1079 public void setFragment(String p_fragment) throws MalformedURIException { 1080 if (p_fragment == null) { 1081 m_fragment = null; 1082 } 1083 else if (!isGenericURI()) { 1084 throw new MalformedURIException( 1085 "Fragment can only be set for a generic URI!"); 1086 } 1087 else if (getPath() == null) { 1088 throw new MalformedURIException( 1089 "Fragment cannot be set when path is null!"); 1090 } 1091 else if (!isURIString(p_fragment)) { 1092 throw new MalformedURIException( 1093 "Fragment contains invalid character!"); 1094 } 1095 else { 1096 m_fragment = p_fragment; 1097 } 1098 } 1099 1100 /** 1101 * Determines if the passed-in Object is equivalent to this URI. 1102 * 1103 * @param p_test the Object to test for equality. 1104 * 1105 * @return true if p_test is a URI with all values equal to this 1106 * URI, false otherwise 1107 */ 1108 @Override 1109 public boolean equals(Object p_test) { 1110 if (p_test instanceof JaxmURI) { 1111 JaxmURI testURI = (JaxmURI) p_test; 1112 if (((m_scheme == null && testURI.m_scheme == null) || 1113 (m_scheme != null && testURI.m_scheme != null && 1114 m_scheme.equals(testURI.m_scheme))) && 1115 ((m_userinfo == null && testURI.m_userinfo == null) || 1116 (m_userinfo != null && testURI.m_userinfo != null && 1117 m_userinfo.equals(testURI.m_userinfo))) && 1118 ((m_host == null && testURI.m_host == null) || 1119 (m_host != null && testURI.m_host != null && 1120 m_host.equals(testURI.m_host))) && 1121 m_port == testURI.m_port && 1122 ((m_path == null && testURI.m_path == null) || 1123 (m_path != null && testURI.m_path != null && 1124 m_path.equals(testURI.m_path))) && 1125 ((m_queryString == null && testURI.m_queryString == null) || 1126 (m_queryString != null && testURI.m_queryString != null && 1127 m_queryString.equals(testURI.m_queryString))) && 1128 ((m_fragment == null && testURI.m_fragment == null) || 1129 (m_fragment != null && testURI.m_fragment != null && 1130 m_fragment.equals(testURI.m_fragment)))) { 1131 return true; 1132 } 1133 } 1134 return false; 1135 } 1136 1137 @Override 1138 public int hashCode() { 1139 // No members safe to use, just default to a constant. 1140 return 153214; 1141 } 1142 1143 /** 1144 * Get the URI as a string specification. See RFC 2396 Section 5.2. 1145 * 1146 * @return the URI string specification 1147 */ 1148 @Override 1149 public String toString() { 1150 StringBuilder uriSpecString = new StringBuilder(); 1151 1152 if (m_scheme != null) { 1153 uriSpecString.append(m_scheme); 1154 uriSpecString.append(':'); 1155 } 1156 uriSpecString.append(getSchemeSpecificPart()); 1157 return uriSpecString.toString(); 1158 } 1159 1160 /** 1161 * Get the indicator as to whether this URI uses the "generic URI" 1162 * syntax. 1163 * 1164 * @return true if this URI uses the "generic URI" syntax, false 1165 * otherwise 1166 */ 1167 public boolean isGenericURI() { 1168 // presence of the host (whether valid or empty) means 1169 // double-slashes which means generic uri 1170 return (m_host != null); 1171 } 1172 1173 /** 1174 * Determine whether a scheme conforms to the rules for a scheme name. 1175 * A scheme is conformant if it starts with an alphanumeric, and 1176 * contains only alphanumerics, '+','-' and '.'. 1177 * 1178 * @param p_scheme scheme name 1179 * 1180 * @return true if the scheme is conformant, false otherwise 1181 */ 1182 public static boolean isConformantSchemeName(String p_scheme) { 1183 if (p_scheme == null || p_scheme.trim().length() == 0) { 1184 return false; 1185 } 1186 1187 if (!isAlpha(p_scheme.charAt(0))) { 1188 return false; 1189 } 1190 1191 char testChar; 1192 for (int i = 1; i < p_scheme.length(); i++) { 1193 testChar = p_scheme.charAt(i); 1194 if (!isAlphanum(testChar) && 1195 SCHEME_CHARACTERS.indexOf(testChar) == -1) { 1196 return false; 1197 } 1198 } 1199 1200 return true; 1201 } 1202 1203 /** 1204 * Determine whether a string is syntactically capable of representing 1205 * a valid IPv4 address or the domain name of a network host. A valid 1206 * IPv4 address consists of four decimal digit groups separated by a 1207 * '.'. A hostname consists of domain labels (each of which must 1208 * begin and end with an alphanumeric but may contain '-') separated 1209 * by a '.'. See RFC 2396 Section 3.2.2. 1210 * 1211 * @param p_address address 1212 * 1213 * @return true if the string is a syntactically valid IPv4 address 1214 * or hostname 1215 */ 1216 public static boolean isWellFormedAddress(String p_address) { 1217 if (p_address == null) { 1218 return false; 1219 } 1220 1221 String address = p_address.trim(); 1222 int addrLength = address.length(); 1223 if (addrLength == 0 || addrLength > 255) { 1224 return false; 1225 } 1226 1227 if (address.startsWith(".") || address.startsWith("-")) { 1228 return false; 1229 } 1230 1231 // rightmost domain label starting with digit indicates IP address 1232 // since top level domain label can only start with an alpha 1233 // see RFC 2396 Section 3.2.2 1234 int index = address.lastIndexOf('.'); 1235 if (address.endsWith(".")) { 1236 index = address.substring(0, index).lastIndexOf('.'); 1237 } 1238 1239 if (index+1 < addrLength && isDigit(p_address.charAt(index+1))) { 1240 char testChar; 1241 int numDots = 0; 1242 1243 // make sure that 1) we see only digits and dot separators, 2) that 1244 // any dot separator is preceded and followed by a digit and 1245 // 3) that we find 3 dots 1246 for (int i = 0; i < addrLength; i++) { 1247 testChar = address.charAt(i); 1248 if (testChar == '.') { 1249 if (!isDigit(address.charAt(i-1)) || 1250 (i+1 < addrLength && !isDigit(address.charAt(i+1)))) { 1251 return false; 1252 } 1253 numDots++; 1254 } 1255 else if (!isDigit(testChar)) { 1256 return false; 1257 } 1258 } 1259 if (numDots != 3) { 1260 return false; 1261 } 1262 } 1263 else { 1264 // domain labels can contain alphanumerics and '-" 1265 // but must start and end with an alphanumeric 1266 char testChar; 1267 1268 for (int i = 0; i < addrLength; i++) { 1269 testChar = address.charAt(i); 1270 if (testChar == '.') { 1271 if (!isAlphanum(address.charAt(i-1))) { 1272 return false; 1273 } 1274 if (i+1 < addrLength && !isAlphanum(address.charAt(i+1))) { 1275 return false; 1276 } 1277 } 1278 else if (!isAlphanum(testChar) && testChar != '-') { 1279 return false; 1280 } 1281 } 1282 } 1283 return true; 1284 } 1285 1286 1287 /** 1288 * Determine whether a char is a digit. 1289 * 1290 * @return true if the char is betweeen '0' and '9', false otherwise 1291 */ 1292 private static boolean isDigit(char p_char) { 1293 return p_char >= '0' && p_char <= '9'; 1294 } 1295 1296 /** 1297 * Determine whether a character is a hexadecimal character. 1298 * 1299 * @return true if the char is betweeen '0' and '9', 'a' and 'f' 1300 * or 'A' and 'F', false otherwise 1301 */ 1302 private static boolean isHex(char p_char) { 1303 return (isDigit(p_char) || 1304 (p_char >= 'a' && p_char <= 'f') || 1305 (p_char >= 'A' && p_char <= 'F')); 1306 } 1307 1308 /** 1309 * Determine whether a char is an alphabetic character: a-z or A-Z 1310 * 1311 * @return true if the char is alphabetic, false otherwise 1312 */ 1313 private static boolean isAlpha(char p_char) { 1314 return ((p_char >= 'a' && p_char <= 'z') || 1315 (p_char >= 'A' && p_char <= 'Z' )); 1316 } 1317 1318 /** 1319 * Determine whether a char is an alphanumeric: 0-9, a-z or A-Z 1320 * 1321 * @return true if the char is alphanumeric, false otherwise 1322 */ 1323 private static boolean isAlphanum(char p_char) { 1324 return (isAlpha(p_char) || isDigit(p_char)); 1325 } 1326 1327 /** 1328 * Determine whether a character is a reserved character: 1329 * ';', '/', '?', ':', '@', '&', '=', '+', '$' or ',' 1330 * 1331 * @return true if the string contains any reserved characters 1332 */ 1333 private static boolean isReservedCharacter(char p_char) { 1334 return RESERVED_CHARACTERS.indexOf(p_char) != -1; 1335 } 1336 1337 /** 1338 * Determine whether a char is an unreserved character. 1339 * 1340 * @return true if the char is unreserved, false otherwise 1341 */ 1342 private static boolean isUnreservedCharacter(char p_char) { 1343 return (isAlphanum(p_char) || 1344 MARK_CHARACTERS.indexOf(p_char) != -1); 1345 } 1346 1347 /** 1348 * Determine whether a given string contains only URI characters (also 1349 * called "uric" in RFC 2396). uric consist of all reserved 1350 * characters, unreserved characters and escaped characters. 1351 * 1352 * @return true if the string is comprised of uric, false otherwise 1353 */ 1354 private static boolean isURIString(String p_uric) { 1355 if (p_uric == null) { 1356 return false; 1357 } 1358 int end = p_uric.length(); 1359 char testChar = '\0'; 1360 for (int i = 0; i < end; i++) { 1361 testChar = p_uric.charAt(i); 1362 if (testChar == '%') { 1363 if (i+2 >= end || 1364 !isHex(p_uric.charAt(i+1)) || 1365 !isHex(p_uric.charAt(i+2))) { 1366 return false; 1367 } 1368 else { 1369 i += 2; 1370 continue; 1371 } 1372 } 1373 if (isReservedCharacter(testChar) || 1374 isUnreservedCharacter(testChar)) { 1375 continue; 1376 } 1377 else { 1378 return false; 1379 } 1380 } 1381 return true; 1382 } 1383 }