1 /*
2 * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
3 * @LastModified: Oct 2017
4 */
5 /*
6 * Licensed to the Apache Software Foundation (ASF) under one or more
7 * contributor license agreements. See the NOTICE file distributed with
8 * this work for additional information regarding copyright ownership.
9 * The ASF licenses this file to You under the Apache License, Version 2.0
10 * (the "License"); you may not use this file except in compliance with
11 * the License. You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
20 */
21
22 package com.sun.org.apache.xml.internal.serializer.utils;
23
24 import java.io.IOException;
25 import java.util.Objects;
26
27
28 /**
29 * A class to represent a Uniform Resource Identifier (URI). This class
30 * is designed to handle the parsing of URIs and provide access to
31 * the various components (scheme, host, port, userinfo, path, query
32 * string and fragment) that may constitute a URI.
33 * <p>
34 * Parsing of a URI specification is done according to the URI
35 * syntax described in RFC 2396
36 * <http://www.ietf.org/rfc/rfc2396.txt?number=2396>. Every URI consists
37 * of a scheme, followed by a colon (':'), followed by a scheme-specific
38 * part. For URIs that follow the "generic URI" syntax, the scheme-
39 * specific part begins with two slashes ("//") and may be followed
40 * by an authority segment (comprised of user information, host, and
41 * port), path segment, query segment and fragment. Note that RFC 2396
42 * no longer specifies the use of the parameters segment and excludes
43 * the "user:password" syntax as part of the authority segment. If
44 * "user:password" appears in a URI, the entire user/password string
45 * is stored as userinfo.
46 * <p>
47 * For URIs that do not follow the "generic URI" syntax (e.g. mailto),
48 * the entire scheme-specific part is treated as the "path" portion
49 * of the URI.
50 * <p>
51 * Note that, unlike the java.net.URL class, this class does not provide
52 * any built-in network access functionality nor does it provide any
53 * scheme-specific functionality (for example, it does not know a
54 * default port for a specific scheme). Rather, it only knows the
55 * grammar and basic set of operations that can be applied to a URI.
56 *
57 * This class is a copy of the one in com.sun.org.apache.xml.internal.utils.
58 * It exists to cut the serializers dependancy on that package.
59 *
60 * A minor change from the original is that this class no longer implements
61 * Serializable, and the serialVersionUID magic field is dropped, and
62 * the class is no longer "public".
63 *
64 * @xsl.usage internal
65 */
66 final class URI
67 {
68 /**
69 * MalformedURIExceptions are thrown in the process of building a URI
70 * or setting fields on a URI when an operation would result in an
71 * invalid URI specification.
72 *
73 */
74 public static class MalformedURIException extends IOException
75 {
76 private static final long serialVersionUID = 4651455286983598951L;
77
78 /**
79 * Constructs a <code>MalformedURIException</code> with no specified
80 * detail message.
81 */
82 public MalformedURIException()
83 {
84 super();
85 }
86
87 /**
88 * Constructs a <code>MalformedURIException</code> with the
89 * specified detail message.
90 *
91 * @param p_msg the detail message.
92 */
93 public MalformedURIException(String p_msg)
94 {
95 super(p_msg);
96 }
97 }
98
99 /** reserved characters */
100 private static final String RESERVED_CHARACTERS = ";/?:@&=+$,";
101
102 /**
103 * URI punctuation mark characters - these, combined with
104 * alphanumerics, constitute the "unreserved" characters
105 */
106 private static final String MARK_CHARACTERS = "-_.!~*'() ";
107
108 /** scheme can be composed of alphanumerics and these characters */
109 private static final String SCHEME_CHARACTERS = "+-.";
110
111 /**
112 * userinfo can be composed of unreserved, escaped and these
113 * characters
114 */
115 private static final String USERINFO_CHARACTERS = ";:&=+$,";
116
117 /** Stores the scheme (usually the protocol) for this URI.
118 * @serial */
119 private String m_scheme = null;
120
121 /** If specified, stores the userinfo for this URI; otherwise null.
122 * @serial */
123 private String m_userinfo = null;
124
125 /** If specified, stores the host for this URI; otherwise null.
126 * @serial */
127 private String m_host = null;
128
129 /** If specified, stores the port for this URI; otherwise -1.
130 * @serial */
131 private int m_port = -1;
132
133 /** If specified, stores the path for this URI; otherwise null.
134 * @serial */
135 private String m_path = null;
136
137 /**
138 * If specified, stores the query string for this URI; otherwise
139 * null.
140 * @serial
141 */
142 private String m_queryString = null;
143
144 /** If specified, stores the fragment for this URI; otherwise null.
145 * @serial */
146 private String m_fragment = null;
147
148 /** Indicate whether in DEBUG mode */
149 private static boolean DEBUG = false;
150
151 /**
152 * Construct a new and uninitialized URI.
153 */
154 public URI(){}
155
156 /**
157 * Construct a new URI from another URI. All fields for this URI are
158 * set equal to the fields of the URI passed in.
159 *
160 * @param p_other the URI to copy (cannot be null)
161 */
162 public URI(URI p_other)
163 {
164 initialize(p_other);
165 }
166
167 /**
168 * Construct a new URI from a URI specification string. If the
169 * specification follows the "generic URI" syntax, (two slashes
170 * following the first colon), the specification will be parsed
171 * accordingly - setting the scheme, userinfo, host,port, path, query
172 * string and fragment fields as necessary. If the specification does
173 * not follow the "generic URI" syntax, the specification is parsed
174 * into a scheme and scheme-specific part (stored as the path) only.
175 *
176 * @param p_uriSpec the URI specification string (cannot be null or
177 * empty)
178 *
179 * @throws MalformedURIException if p_uriSpec violates any syntax
180 * rules
181 */
182 public URI(String p_uriSpec) throws MalformedURIException
183 {
184 this((URI) null, p_uriSpec);
185 }
186
187 /**
188 * Construct a new URI from a base URI and a URI specification string.
189 * The URI specification string may be a relative URI.
190 *
191 * @param p_base the base URI (cannot be null if p_uriSpec is null or
192 * empty)
193 * @param p_uriSpec the URI specification string (cannot be null or
194 * empty if p_base is null)
195 *
196 * @throws MalformedURIException if p_uriSpec violates any syntax
197 * rules
198 */
199 public URI(URI p_base, String p_uriSpec) throws MalformedURIException
200 {
201 initialize(p_base, p_uriSpec);
202 }
203
204 /**
205 * Construct a new URI that does not follow the generic URI syntax.
206 * Only the scheme and scheme-specific part (stored as the path) are
207 * initialized.
208 *
209 * @param p_scheme the URI scheme (cannot be null or empty)
210 * @param p_schemeSpecificPart the scheme-specific part (cannot be
211 * null or empty)
212 *
213 * @throws MalformedURIException if p_scheme violates any
214 * syntax rules
215 */
216 public URI(String p_scheme, String p_schemeSpecificPart)
217 throws MalformedURIException
218 {
219
220 if (p_scheme == null || p_scheme.trim().length() == 0)
221 {
222 throw new MalformedURIException(
223 "Cannot construct URI with null/empty scheme!");
224 }
225
226 if (p_schemeSpecificPart == null
227 || p_schemeSpecificPart.trim().length() == 0)
228 {
229 throw new MalformedURIException(
230 "Cannot construct URI with null/empty scheme-specific part!");
231 }
232
233 setScheme(p_scheme);
234 setPath(p_schemeSpecificPart);
235 }
236
237 /**
238 * Construct a new URI that follows the generic URI syntax from its
239 * component parts. Each component is validated for syntax and some
240 * basic semantic checks are performed as well. See the individual
241 * setter methods for specifics.
242 *
243 * @param p_scheme the URI scheme (cannot be null or empty)
244 * @param p_host the hostname or IPv4 address for the URI
245 * @param p_path the URI path - if the path contains '?' or '#',
246 * then the query string and/or fragment will be
247 * set from the path; however, if the query and
248 * fragment are specified both in the path and as
249 * separate parameters, an exception is thrown
250 * @param p_queryString the URI query string (cannot be specified
251 * if path is null)
252 * @param p_fragment the URI fragment (cannot be specified if path
253 * is null)
254 *
255 * @throws MalformedURIException if any of the parameters violates
256 * syntax rules or semantic rules
257 */
258 public URI(String p_scheme, String p_host, String p_path, String p_queryString, String p_fragment)
259 throws MalformedURIException
260 {
261 this(p_scheme, null, p_host, -1, p_path, p_queryString, p_fragment);
262 }
263
264 /**
265 * Construct a new URI that follows the generic URI syntax from its
266 * component parts. Each component is validated for syntax and some
267 * basic semantic checks are performed as well. See the individual
268 * setter methods for specifics.
269 *
270 * @param p_scheme the URI scheme (cannot be null or empty)
271 * @param p_userinfo the URI userinfo (cannot be specified if host
272 * is null)
273 * @param p_host the hostname or IPv4 address for the URI
274 * @param p_port the URI port (may be -1 for "unspecified"; cannot
275 * be specified if host is null)
276 * @param p_path the URI path - if the path contains '?' or '#',
277 * then the query string and/or fragment will be
278 * set from the path; however, if the query and
279 * fragment are specified both in the path and as
280 * separate parameters, an exception is thrown
281 * @param p_queryString the URI query string (cannot be specified
282 * if path is null)
283 * @param p_fragment the URI fragment (cannot be specified if path
284 * is null)
285 *
286 * @throws MalformedURIException if any of the parameters violates
287 * syntax rules or semantic rules
288 */
289 public URI(String p_scheme, String p_userinfo, String p_host, int p_port, String p_path, String p_queryString, String p_fragment)
290 throws MalformedURIException
291 {
292
293 if (p_scheme == null || p_scheme.trim().length() == 0)
294 {
295 throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_SCHEME_REQUIRED, null)); //"Scheme is required!");
296 }
297
298 if (p_host == null)
299 {
300 if (p_userinfo != null)
301 {
302 throw new MalformedURIException(
303 Utils.messages.createMessage(MsgKey.ER_NO_USERINFO_IF_NO_HOST, null)); //"Userinfo may not be specified if host is not specified!");
304 }
305
306 if (p_port != -1)
307 {
308 throw new MalformedURIException(
309 Utils.messages.createMessage(MsgKey.ER_NO_PORT_IF_NO_HOST, null)); //"Port may not be specified if host is not specified!");
310 }
311 }
312
313 if (p_path != null)
314 {
315 if (p_path.indexOf('?') != -1 && p_queryString != null)
316 {
317 throw new MalformedURIException(
318 Utils.messages.createMessage(MsgKey.ER_NO_QUERY_STRING_IN_PATH, null)); //"Query string cannot be specified in path and query string!");
319 }
320
321 if (p_path.indexOf('#') != -1 && p_fragment != null)
322 {
323 throw new MalformedURIException(
324 Utils.messages.createMessage(MsgKey.ER_NO_FRAGMENT_STRING_IN_PATH, null)); //"Fragment cannot be specified in both the path and fragment!");
325 }
326 }
327
328 setScheme(p_scheme);
329 setHost(p_host);
330 setPort(p_port);
331 setUserinfo(p_userinfo);
332 setPath(p_path);
333 setQueryString(p_queryString);
334 setFragment(p_fragment);
335 }
336
337 /**
338 * Initialize all fields of this URI from another URI.
339 *
340 * @param p_other the URI to copy (cannot be null)
341 */
342 private void initialize(URI p_other)
343 {
344
345 m_scheme = p_other.getScheme();
346 m_userinfo = p_other.getUserinfo();
347 m_host = p_other.getHost();
348 m_port = p_other.getPort();
349 m_path = p_other.getPath();
350 m_queryString = p_other.getQueryString();
351 m_fragment = p_other.getFragment();
352 }
353
354 /**
355 * Initializes this URI from a base URI and a URI specification string.
356 * See RFC 2396 Section 4 and Appendix B for specifications on parsing
357 * the URI and Section 5 for specifications on resolving relative URIs
358 * and relative paths.
359 *
360 * @param p_base the base URI (may be null if p_uriSpec is an absolute
361 * URI)
362 * @param p_uriSpec the URI spec string which may be an absolute or
363 * relative URI (can only be null/empty if p_base
364 * is not null)
365 *
366 * @throws MalformedURIException if p_base is null and p_uriSpec
367 * is not an absolute URI or if
368 * p_uriSpec violates syntax rules
369 */
370 private void initialize(URI p_base, String p_uriSpec)
371 throws MalformedURIException
372 {
373
374 if (p_base == null
375 && (p_uriSpec == null || p_uriSpec.trim().length() == 0))
376 {
377 throw new MalformedURIException(
378 Utils.messages.createMessage(MsgKey.ER_CANNOT_INIT_URI_EMPTY_PARMS, null)); //"Cannot initialize URI with empty parameters.");
379 }
380
381 // just make a copy of the base if spec is empty
382 if (p_uriSpec == null || p_uriSpec.trim().length() == 0)
383 {
384 initialize(p_base);
385
386 return;
387 }
388
389 String uriSpec = p_uriSpec.trim();
390 int uriSpecLen = uriSpec.length();
391 int index = 0;
392
393 // check for scheme
394 int colonIndex = uriSpec.indexOf(':');
395 if (colonIndex < 0)
396 {
397 if (p_base == null)
398 {
399 throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_NO_SCHEME_IN_URI, new Object[]{uriSpec})); //"No scheme found in URI: "+uriSpec);
400 }
401 }
402 else
403 {
404 initializeScheme(uriSpec);
405 uriSpec = uriSpec.substring(colonIndex+1);
406 uriSpecLen = uriSpec.length();
407 }
408
409 // two slashes means generic URI syntax, so we get the authority
410 if (((index + 1) < uriSpecLen)
411 && (uriSpec.substring(index).startsWith("//")))
412 {
413 index += 2;
414
415 int startPos = index;
416
417 // get authority - everything up to path, query or fragment
418 char testChar = '\0';
419
420 while (index < uriSpecLen)
421 {
422 testChar = uriSpec.charAt(index);
423
424 if (testChar == '/' || testChar == '?' || testChar == '#')
425 {
426 break;
427 }
428
429 index++;
430 }
431
432 // if we found authority, parse it out, otherwise we set the
433 // host to empty string
434 if (index > startPos)
435 {
436 initializeAuthority(uriSpec.substring(startPos, index));
437 }
438 else
439 {
440 m_host = "";
441 }
442 }
443
444 initializePath(uriSpec.substring(index));
445
446 // Resolve relative URI to base URI - see RFC 2396 Section 5.2
447 // In some cases, it might make more sense to throw an exception
448 // (when scheme is specified is the string spec and the base URI
449 // is also specified, for example), but we're just following the
450 // RFC specifications
451 if (p_base != null)
452 {
453
454 // check to see if this is the current doc - RFC 2396 5.2 #2
455 // note that this is slightly different from the RFC spec in that
456 // we don't include the check for query string being null
457 // - this handles cases where the urispec is just a query
458 // string or a fragment (e.g. "?y" or "#s") -
459 // see <http://www.ics.uci.edu/~fielding/url/test1.html> which
460 // identified this as a bug in the RFC
461 if (m_path.length() == 0 && m_scheme == null && m_host == null)
462 {
463 m_scheme = p_base.getScheme();
464 m_userinfo = p_base.getUserinfo();
465 m_host = p_base.getHost();
466 m_port = p_base.getPort();
467 m_path = p_base.getPath();
468
469 if (m_queryString == null)
470 {
471 m_queryString = p_base.getQueryString();
472 }
473
474 return;
475 }
476
477 // check for scheme - RFC 2396 5.2 #3
478 // if we found a scheme, it means absolute URI, so we're done
479 if (m_scheme == null)
480 {
481 m_scheme = p_base.getScheme();
482 }
483
484 // check for authority - RFC 2396 5.2 #4
485 // if we found a host, then we've got a network path, so we're done
486 if (m_host == null)
487 {
488 m_userinfo = p_base.getUserinfo();
489 m_host = p_base.getHost();
490 m_port = p_base.getPort();
491 }
492 else
493 {
494 return;
495 }
496
497 // check for absolute path - RFC 2396 5.2 #5
498 if (m_path.length() > 0 && m_path.startsWith("/"))
499 {
500 return;
501 }
502
503 // if we get to this point, we need to resolve relative path
504 // RFC 2396 5.2 #6
505 String path = "";
506 String basePath = p_base.getPath();
507
508 // 6a - get all but the last segment of the base URI path
509 if (basePath != null)
510 {
511 int lastSlash = basePath.lastIndexOf('/');
512
513 if (lastSlash != -1)
514 {
515 path = basePath.substring(0, lastSlash + 1);
516 }
517 }
518
519 // 6b - append the relative URI path
520 path = path.concat(m_path);
521
522 // 6c - remove all "./" where "." is a complete path segment
523 index = -1;
524
525 while ((index = path.indexOf("/./")) != -1)
526 {
527 path = path.substring(0, index + 1).concat(path.substring(index + 3));
528 }
529
530 // 6d - remove "." if path ends with "." as a complete path segment
531 if (path.endsWith("/."))
532 {
533 path = path.substring(0, path.length() - 1);
534 }
535
536 // 6e - remove all "<segment>/../" where "<segment>" is a complete
537 // path segment not equal to ".."
538 index = -1;
539
540 int segIndex = -1;
541 String tempString = null;
542
543 while ((index = path.indexOf("/../")) > 0)
544 {
545 tempString = path.substring(0, path.indexOf("/../"));
546 segIndex = tempString.lastIndexOf('/');
547
548 if (segIndex != -1)
549 {
550 if (!tempString.substring(segIndex++).equals(".."))
551 {
552 path = path.substring(0, segIndex).concat(path.substring(index
553 + 4));
554 }
555 }
556 }
557
558 // 6f - remove ending "<segment>/.." where "<segment>" is a
559 // complete path segment
560 if (path.endsWith("/.."))
561 {
562 tempString = path.substring(0, path.length() - 3);
563 segIndex = tempString.lastIndexOf('/');
564
565 if (segIndex != -1)
566 {
567 path = path.substring(0, segIndex + 1);
568 }
569 }
570
571 m_path = path;
572 }
573 }
574
575 /**
576 * Initialize the scheme for this URI from a URI string spec.
577 *
578 * @param p_uriSpec the URI specification (cannot be null)
579 *
580 * @throws MalformedURIException if URI does not have a conformant
581 * scheme
582 */
583 private void initializeScheme(String p_uriSpec) throws MalformedURIException
584 {
585
586 int uriSpecLen = p_uriSpec.length();
587 int index = 0;
588 String scheme = null;
589 char testChar = '\0';
590
591 while (index < uriSpecLen)
592 {
593 testChar = p_uriSpec.charAt(index);
594
595 if (testChar == ':' || testChar == '/' || testChar == '?'
596 || testChar == '#')
597 {
598 break;
599 }
600
601 index++;
602 }
603
604 scheme = p_uriSpec.substring(0, index);
605
606 if (scheme.length() == 0)
607 {
608 throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_NO_SCHEME_INURI, null)); //"No scheme found in URI.");
609 }
610 else
611 {
612 setScheme(scheme);
613 }
614 }
615
616 /**
617 * Initialize the authority (userinfo, host and port) for this
618 * URI from a URI string spec.
619 *
620 * @param p_uriSpec the URI specification (cannot be null)
621 *
622 * @throws MalformedURIException if p_uriSpec violates syntax rules
623 */
624 private void initializeAuthority(String p_uriSpec)
625 throws MalformedURIException
626 {
627
628 int index = 0;
629 int start = 0;
630 int end = p_uriSpec.length();
631 char testChar = '\0';
632 String userinfo = null;
633
634 // userinfo is everything up @
635 if (p_uriSpec.indexOf('@', start) != -1)
636 {
637 while (index < end)
638 {
639 testChar = p_uriSpec.charAt(index);
640
641 if (testChar == '@')
642 {
643 break;
644 }
645
646 index++;
647 }
648
649 userinfo = p_uriSpec.substring(start, index);
650
651 index++;
652 }
653
654 // host is everything up to ':'
655 String host = null;
656
657 start = index;
658
659 while (index < end)
660 {
661 testChar = p_uriSpec.charAt(index);
662
663 if (testChar == ':')
664 {
665 break;
666 }
667
668 index++;
669 }
670
671 host = p_uriSpec.substring(start, index);
672
673 int port = -1;
674
675 if (host.length() > 0)
676 {
677
678 // port
679 if (testChar == ':')
680 {
681 index++;
682
683 start = index;
684
685 while (index < end)
686 {
687 index++;
688 }
689
690 String portStr = p_uriSpec.substring(start, index);
691
692 if (portStr.length() > 0)
693 {
694 for (int i = 0; i < portStr.length(); i++)
695 {
696 if (!isDigit(portStr.charAt(i)))
697 {
698 throw new MalformedURIException(
699 portStr + " is invalid. Port should only contain digits!");
700 }
701 }
702
703 try
704 {
705 port = Integer.parseInt(portStr);
706 }
707 catch (NumberFormatException nfe)
708 {
709
710 // can't happen
711 }
712 }
713 }
714 }
715
716 setHost(host);
717 setPort(port);
718 setUserinfo(userinfo);
719 }
720
721 /**
722 * Initialize the path for this URI from a URI string spec.
723 *
724 * @param p_uriSpec the URI specification (cannot be null)
725 *
726 * @throws MalformedURIException if p_uriSpec violates syntax rules
727 */
728 private void initializePath(String p_uriSpec) throws MalformedURIException
729 {
730
731 if (p_uriSpec == null)
732 {
733 throw new MalformedURIException(
734 "Cannot initialize path from null string!");
735 }
736
737 int index = 0;
738 int start = 0;
739 int end = p_uriSpec.length();
740 char testChar = '\0';
741
742 // path - everything up to query string or fragment
743 while (index < end)
744 {
745 testChar = p_uriSpec.charAt(index);
746
747 if (testChar == '?' || testChar == '#')
748 {
749 break;
750 }
751
752 // check for valid escape sequence
753 if (testChar == '%')
754 {
755 if (index + 2 >= end ||!isHex(p_uriSpec.charAt(index + 1))
756 ||!isHex(p_uriSpec.charAt(index + 2)))
757 {
758 throw new MalformedURIException(
759 Utils.messages.createMessage(MsgKey.ER_PATH_CONTAINS_INVALID_ESCAPE_SEQUENCE, null)); //"Path contains invalid escape sequence!");
760 }
761 }
762 else if (!isReservedCharacter(testChar)
763 &&!isUnreservedCharacter(testChar))
764 {
765 if ('\\' != testChar)
766 throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_PATH_INVALID_CHAR, new Object[]{String.valueOf(testChar)})); //"Path contains invalid character: "
767 //+ testChar);
768 }
769
770 index++;
771 }
772
773 m_path = p_uriSpec.substring(start, index);
774
775 // query - starts with ? and up to fragment or end
776 if (testChar == '?')
777 {
778 index++;
779
780 start = index;
781
782 while (index < end)
783 {
784 testChar = p_uriSpec.charAt(index);
785
786 if (testChar == '#')
787 {
788 break;
789 }
790
791 if (testChar == '%')
792 {
793 if (index + 2 >= end ||!isHex(p_uriSpec.charAt(index + 1))
794 ||!isHex(p_uriSpec.charAt(index + 2)))
795 {
796 throw new MalformedURIException(
797 "Query string contains invalid escape sequence!");
798 }
799 }
800 else if (!isReservedCharacter(testChar)
801 &&!isUnreservedCharacter(testChar))
802 {
803 throw new MalformedURIException(
804 "Query string contains invalid character:" + testChar);
805 }
806
807 index++;
808 }
809
810 m_queryString = p_uriSpec.substring(start, index);
811 }
812
813 // fragment - starts with #
814 if (testChar == '#')
815 {
816 index++;
817
818 start = index;
819
820 while (index < end)
821 {
822 testChar = p_uriSpec.charAt(index);
823
824 if (testChar == '%')
825 {
826 if (index + 2 >= end ||!isHex(p_uriSpec.charAt(index + 1))
827 ||!isHex(p_uriSpec.charAt(index + 2)))
828 {
829 throw new MalformedURIException(
830 "Fragment contains invalid escape sequence!");
831 }
832 }
833 else if (!isReservedCharacter(testChar)
834 &&!isUnreservedCharacter(testChar))
835 {
836 throw new MalformedURIException(
837 "Fragment contains invalid character:" + testChar);
838 }
839
840 index++;
841 }
842
843 m_fragment = p_uriSpec.substring(start, index);
844 }
845 }
846
847 /**
848 * Get the scheme for this URI.
849 *
850 * @return the scheme for this URI
851 */
852 public String getScheme()
853 {
854 return m_scheme;
855 }
856
857 /**
858 * Get the scheme-specific part for this URI (everything following the
859 * scheme and the first colon). See RFC 2396 Section 5.2 for spec.
860 *
861 * @return the scheme-specific part for this URI
862 */
863 public String getSchemeSpecificPart()
864 {
865
866 final StringBuilder schemespec = new StringBuilder();
867
868 if (m_userinfo != null || m_host != null || m_port != -1)
869 {
870 schemespec.append("//");
871 }
872
873 if (m_userinfo != null)
874 {
875 schemespec.append(m_userinfo);
876 schemespec.append('@');
877 }
878
879 if (m_host != null)
880 {
881 schemespec.append(m_host);
882 }
883
884 if (m_port != -1)
885 {
886 schemespec.append(':');
887 schemespec.append(m_port);
888 }
889
890 if (m_path != null)
891 {
892 schemespec.append((m_path));
893 }
894
895 if (m_queryString != null)
896 {
897 schemespec.append('?');
898 schemespec.append(m_queryString);
899 }
900
901 if (m_fragment != null)
902 {
903 schemespec.append('#');
904 schemespec.append(m_fragment);
905 }
906
907 return schemespec.toString();
908 }
909
910 /**
911 * Get the userinfo for this URI.
912 *
913 * @return the userinfo for this URI (null if not specified).
914 */
915 public String getUserinfo()
916 {
917 return m_userinfo;
918 }
919
920 /**
921 * Get the host for this URI.
922 *
923 * @return the host for this URI (null if not specified).
924 */
925 public String getHost()
926 {
927 return m_host;
928 }
929
930 /**
931 * Get the port for this URI.
932 *
933 * @return the port for this URI (-1 if not specified).
934 */
935 public int getPort()
936 {
937 return m_port;
938 }
939
940 /**
941 * Get the path for this URI (optionally with the query string and
942 * fragment).
943 *
944 * @param p_includeQueryString if true (and query string is not null),
945 * then a "?" followed by the query string
946 * will be appended
947 * @param p_includeFragment if true (and fragment is not null),
948 * then a "#" followed by the fragment
949 * will be appended
950 *
951 * @return the path for this URI possibly including the query string
952 * and fragment
953 */
954 public String getPath(boolean p_includeQueryString,
955 boolean p_includeFragment)
956 {
957
958 final StringBuilder pathString = new StringBuilder(m_path);
959
960 if (p_includeQueryString && m_queryString != null)
961 {
962 pathString.append('?');
963 pathString.append(m_queryString);
964 }
965
966 if (p_includeFragment && m_fragment != null)
967 {
968 pathString.append('#');
969 pathString.append(m_fragment);
970 }
971
972 return pathString.toString();
973 }
974
975 /**
976 * Get the path for this URI. Note that the value returned is the path
977 * only and does not include the query string or fragment.
978 *
979 * @return the path for this URI.
980 */
981 public String getPath()
982 {
983 return m_path;
984 }
985
986 /**
987 * Get the query string for this URI.
988 *
989 * @return the query string for this URI. Null is returned if there
990 * was no "?" in the URI spec, empty string if there was a
991 * "?" but no query string following it.
992 */
993 public String getQueryString()
994 {
995 return m_queryString;
996 }
997
998 /**
999 * Get the fragment for this URI.
1000 *
1001 * @return the fragment for this URI. Null is returned if there
1002 * was no "#" in the URI spec, empty string if there was a
1003 * "#" but no fragment following it.
1004 */
1005 public String getFragment()
1006 {
1007 return m_fragment;
1008 }
1009
1010 /**
1011 * Set the scheme for this URI. The scheme is converted to lowercase
1012 * before it is set.
1013 *
1014 * @param p_scheme the scheme for this URI (cannot be null)
1015 *
1016 * @throws MalformedURIException if p_scheme is not a conformant
1017 * scheme name
1018 */
1019 public void setScheme(String p_scheme) throws MalformedURIException
1020 {
1021
1022 if (p_scheme == null)
1023 {
1024 throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_SCHEME_FROM_NULL_STRING, null)); //"Cannot set scheme from null string!");
1025 }
1026
1027 if (!isConformantSchemeName(p_scheme))
1028 {
1029 throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_SCHEME_NOT_CONFORMANT, null)); //"The scheme is not conformant.");
1030 }
1031
1032 m_scheme = p_scheme.toLowerCase();
1033 }
1034
1035 /**
1036 * Set the userinfo for this URI. If a non-null value is passed in and
1037 * the host value is null, then an exception is thrown.
1038 *
1039 * @param p_userinfo the userinfo for this URI
1040 *
1041 * @throws MalformedURIException if p_userinfo contains invalid
1042 * characters
1043 */
1044 public void setUserinfo(String p_userinfo) throws MalformedURIException
1045 {
1046
1047 if (p_userinfo == null)
1048 {
1049 m_userinfo = null;
1050 }
1051 else
1052 {
1053 if (m_host == null)
1054 {
1055 throw new MalformedURIException(
1056 "Userinfo cannot be set when host is null!");
1057 }
1058
1059 // userinfo can contain alphanumerics, mark characters, escaped
1060 // and ';',':','&','=','+','$',','
1061 int index = 0;
1062 int end = p_userinfo.length();
1063 char testChar = '\0';
1064
1065 while (index < end)
1066 {
1067 testChar = p_userinfo.charAt(index);
1068
1069 if (testChar == '%')
1070 {
1071 if (index + 2 >= end ||!isHex(p_userinfo.charAt(index + 1))
1072 ||!isHex(p_userinfo.charAt(index + 2)))
1073 {
1074 throw new MalformedURIException(
1075 "Userinfo contains invalid escape sequence!");
1076 }
1077 }
1078 else if (!isUnreservedCharacter(testChar)
1079 && USERINFO_CHARACTERS.indexOf(testChar) == -1)
1080 {
1081 throw new MalformedURIException(
1082 "Userinfo contains invalid character:" + testChar);
1083 }
1084
1085 index++;
1086 }
1087 }
1088
1089 m_userinfo = p_userinfo;
1090 }
1091
1092 /**
1093 * Set the host for this URI. If null is passed in, the userinfo
1094 * field is also set to null and the port is set to -1.
1095 *
1096 * @param p_host the host for this URI
1097 *
1098 * @throws MalformedURIException if p_host is not a valid IP
1099 * address or DNS hostname.
1100 */
1101 public void setHost(String p_host) throws MalformedURIException
1102 {
1103
1104 if (p_host == null || p_host.trim().length() == 0)
1105 {
1106 m_host = p_host;
1107 m_userinfo = null;
1108 m_port = -1;
1109 }
1110 else if (!isWellFormedAddress(p_host))
1111 {
1112 throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_HOST_ADDRESS_NOT_WELLFORMED, null)); //"Host is not a well formed address!");
1113 }
1114
1115 m_host = p_host;
1116 }
1117
1118 /**
1119 * Set the port for this URI. -1 is used to indicate that the port is
1120 * not specified, otherwise valid port numbers are between 0 and 65535.
1121 * If a valid port number is passed in and the host field is null,
1122 * an exception is thrown.
1123 *
1124 * @param p_port the port number for this URI
1125 *
1126 * @throws MalformedURIException if p_port is not -1 and not a
1127 * valid port number
1128 */
1129 public void setPort(int p_port) throws MalformedURIException
1130 {
1131
1132 if (p_port >= 0 && p_port <= 65535)
1133 {
1134 if (m_host == null)
1135 {
1136 throw new MalformedURIException(
1137 Utils.messages.createMessage(MsgKey.ER_PORT_WHEN_HOST_NULL, null)); //"Port cannot be set when host is null!");
1138 }
1139 }
1140 else if (p_port != -1)
1141 {
1142 throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_INVALID_PORT, null)); //"Invalid port number!");
1143 }
1144
1145 m_port = p_port;
1146 }
1147
1148 /**
1149 * Set the path for this URI. If the supplied path is null, then the
1150 * query string and fragment are set to null as well. If the supplied
1151 * path includes a query string and/or fragment, these fields will be
1152 * parsed and set as well. Note that, for URIs following the "generic
1153 * URI" syntax, the path specified should start with a slash.
1154 * For URIs that do not follow the generic URI syntax, this method
1155 * sets the scheme-specific part.
1156 *
1157 * @param p_path the path for this URI (may be null)
1158 *
1159 * @throws MalformedURIException if p_path contains invalid
1160 * characters
1161 */
1162 public void setPath(String p_path) throws MalformedURIException
1163 {
1164
1165 if (p_path == null)
1166 {
1167 m_path = null;
1168 m_queryString = null;
1169 m_fragment = null;
1170 }
1171 else
1172 {
1173 initializePath(p_path);
1174 }
1175 }
1176
1177 /**
1178 * Append to the end of the path of this URI. If the current path does
1179 * not end in a slash and the path to be appended does not begin with
1180 * a slash, a slash will be appended to the current path before the
1181 * new segment is added. Also, if the current path ends in a slash
1182 * and the new segment begins with a slash, the extra slash will be
1183 * removed before the new segment is appended.
1184 *
1185 * @param p_addToPath the new segment to be added to the current path
1186 *
1187 * @throws MalformedURIException if p_addToPath contains syntax
1188 * errors
1189 */
1190 public void appendPath(String p_addToPath) throws MalformedURIException
1191 {
1192
1193 if (p_addToPath == null || p_addToPath.trim().length() == 0)
1194 {
1195 return;
1196 }
1197
1198 if (!isURIString(p_addToPath))
1199 {
1200 throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_PATH_INVALID_CHAR, new Object[]{p_addToPath})); //"Path contains invalid character!");
1201 }
1202
1203 if (m_path == null || m_path.trim().length() == 0)
1204 {
1205 if (p_addToPath.startsWith("/"))
1206 {
1207 m_path = p_addToPath;
1208 }
1209 else
1210 {
1211 m_path = "/" + p_addToPath;
1212 }
1213 }
1214 else if (m_path.endsWith("/"))
1215 {
1216 if (p_addToPath.startsWith("/"))
1217 {
1218 m_path = m_path.concat(p_addToPath.substring(1));
1219 }
1220 else
1221 {
1222 m_path = m_path.concat(p_addToPath);
1223 }
1224 }
1225 else
1226 {
1227 if (p_addToPath.startsWith("/"))
1228 {
1229 m_path = m_path.concat(p_addToPath);
1230 }
1231 else
1232 {
1233 m_path = m_path.concat("/" + p_addToPath);
1234 }
1235 }
1236 }
1237
1238 /**
1239 * Set the query string for this URI. A non-null value is valid only
1240 * if this is an URI conforming to the generic URI syntax and
1241 * the path value is not null.
1242 *
1243 * @param p_queryString the query string for this URI
1244 *
1245 * @throws MalformedURIException if p_queryString is not null and this
1246 * URI does not conform to the generic
1247 * URI syntax or if the path is null
1248 */
1249 public void setQueryString(String p_queryString)
1250 throws MalformedURIException
1251 {
1252
1253 if (p_queryString == null)
1254 {
1255 m_queryString = null;
1256 }
1257 else if (!isGenericURI())
1258 {
1259 throw new MalformedURIException(
1260 "Query string can only be set for a generic URI!");
1261 }
1262 else if (getPath() == null)
1263 {
1264 throw new MalformedURIException(
1265 "Query string cannot be set when path is null!");
1266 }
1267 else if (!isURIString(p_queryString))
1268 {
1269 throw new MalformedURIException(
1270 "Query string contains invalid character!");
1271 }
1272 else
1273 {
1274 m_queryString = p_queryString;
1275 }
1276 }
1277
1278 /**
1279 * Set the fragment for this URI. A non-null value is valid only
1280 * if this is a URI conforming to the generic URI syntax and
1281 * the path value is not null.
1282 *
1283 * @param p_fragment the fragment for this URI
1284 *
1285 * @throws MalformedURIException if p_fragment is not null and this
1286 * URI does not conform to the generic
1287 * URI syntax or if the path is null
1288 */
1289 public void setFragment(String p_fragment) throws MalformedURIException
1290 {
1291
1292 if (p_fragment == null)
1293 {
1294 m_fragment = null;
1295 }
1296 else if (!isGenericURI())
1297 {
1298 throw new MalformedURIException(
1299 Utils.messages.createMessage(MsgKey.ER_FRAG_FOR_GENERIC_URI, null)); //"Fragment can only be set for a generic URI!");
1300 }
1301 else if (getPath() == null)
1302 {
1303 throw new MalformedURIException(
1304 Utils.messages.createMessage(MsgKey.ER_FRAG_WHEN_PATH_NULL, null)); //"Fragment cannot be set when path is null!");
1305 }
1306 else if (!isURIString(p_fragment))
1307 {
1308 throw new MalformedURIException(Utils.messages.createMessage(MsgKey.ER_FRAG_INVALID_CHAR, null)); //"Fragment contains invalid character!");
1309 }
1310 else
1311 {
1312 m_fragment = p_fragment;
1313 }
1314 }
1315
1316 /**
1317 * Determines if the passed-in Object is equivalent to this URI.
1318 *
1319 * @param p_test the Object to test for equality.
1320 *
1321 * @return true if p_test is a URI with all values equal to this
1322 * URI, false otherwise
1323 */
1324 @Override
1325 public boolean equals(Object p_test)
1326 {
1327
1328 if (p_test instanceof URI)
1329 {
1330 URI testURI = (URI) p_test;
1331
1332 if (((m_scheme == null && testURI.m_scheme == null) || (m_scheme != null && testURI.m_scheme != null && m_scheme.equals(
1333 testURI.m_scheme))) && ((m_userinfo == null && testURI.m_userinfo == null) || (m_userinfo != null && testURI.m_userinfo != null && m_userinfo.equals(
1334 testURI.m_userinfo))) && ((m_host == null && testURI.m_host == null) || (m_host != null && testURI.m_host != null && m_host.equals(
1335 testURI.m_host))) && m_port == testURI.m_port && ((m_path == null && testURI.m_path == null) || (m_path != null && testURI.m_path != null && m_path.equals(
1336 testURI.m_path))) && ((m_queryString == null && testURI.m_queryString == null) || (m_queryString != null && testURI.m_queryString != null && m_queryString.equals(
1337 testURI.m_queryString))) && ((m_fragment == null && testURI.m_fragment == null) || (m_fragment != null && testURI.m_fragment != null && m_fragment.equals(
1338 testURI.m_fragment))))
1339 {
1340 return true;
1341 }
1342 }
1343
1344 return false;
1345 }
1346
1347 @Override
1348 public int hashCode() {
1349 int hash = 5;
1350 hash = 41 * hash + Objects.hashCode(this.m_scheme);
1351 hash = 41 * hash + Objects.hashCode(this.m_userinfo);
1352 hash = 41 * hash + Objects.hashCode(this.m_host);
1353 hash = 41 * hash + this.m_port;
1354 hash = 41 * hash + Objects.hashCode(this.m_path);
1355 hash = 41 * hash + Objects.hashCode(this.m_queryString);
1356 hash = 41 * hash + Objects.hashCode(this.m_fragment);
1357 return hash;
1358 }
1359
1360 /**
1361 * Get the URI as a string specification. See RFC 2396 Section 5.2.
1362 *
1363 * @return the URI string specification
1364 */
1365 @Override
1366 public String toString()
1367 {
1368
1369 final StringBuilder uriSpecString = new StringBuilder();
1370
1371 if (m_scheme != null)
1372 {
1373 uriSpecString.append(m_scheme);
1374 uriSpecString.append(':');
1375 }
1376
1377 uriSpecString.append(getSchemeSpecificPart());
1378
1379 return uriSpecString.toString();
1380 }
1381
1382 /**
1383 * Get the indicator as to whether this URI uses the "generic URI"
1384 * syntax.
1385 *
1386 * @return true if this URI uses the "generic URI" syntax, false
1387 * otherwise
1388 */
1389 public boolean isGenericURI()
1390 {
1391
1392 // presence of the host (whether valid or empty) means
1393 // double-slashes which means generic uri
1394 return (m_host != null);
1395 }
1396
1397 /**
1398 * Determine whether a scheme conforms to the rules for a scheme name.
1399 * A scheme is conformant if it starts with an alphanumeric, and
1400 * contains only alphanumerics, '+','-' and '.'.
1401 *
1402 *
1403 * @param p_scheme The sheme name to check
1404 * @return true if the scheme is conformant, false otherwise
1405 */
1406 public static boolean isConformantSchemeName(String p_scheme)
1407 {
1408
1409 if (p_scheme == null || p_scheme.trim().length() == 0)
1410 {
1411 return false;
1412 }
1413
1414 if (!isAlpha(p_scheme.charAt(0)))
1415 {
1416 return false;
1417 }
1418
1419 char testChar;
1420
1421 for (int i = 1; i < p_scheme.length(); i++)
1422 {
1423 testChar = p_scheme.charAt(i);
1424
1425 if (!isAlphanum(testChar) && SCHEME_CHARACTERS.indexOf(testChar) == -1)
1426 {
1427 return false;
1428 }
1429 }
1430
1431 return true;
1432 }
1433
1434 /**
1435 * Determine whether a string is syntactically capable of representing
1436 * a valid IPv4 address or the domain name of a network host. A valid
1437 * IPv4 address consists of four decimal digit groups separated by a
1438 * '.'. A hostname consists of domain labels (each of which must
1439 * begin and end with an alphanumeric but may contain '-') separated
1440 * & by a '.'. See RFC 2396 Section 3.2.2.
1441 *
1442 *
1443 * @param p_address The address string to check
1444 * @return true if the string is a syntactically valid IPv4 address
1445 * or hostname
1446 */
1447 public static boolean isWellFormedAddress(String p_address)
1448 {
1449
1450 if (p_address == null)
1451 {
1452 return false;
1453 }
1454
1455 String address = p_address.trim();
1456 int addrLength = address.length();
1457
1458 if (addrLength == 0 || addrLength > 255)
1459 {
1460 return false;
1461 }
1462
1463 if (address.startsWith(".") || address.startsWith("-"))
1464 {
1465 return false;
1466 }
1467
1468 // rightmost domain label starting with digit indicates IP address
1469 // since top level domain label can only start with an alpha
1470 // see RFC 2396 Section 3.2.2
1471 int index = address.lastIndexOf('.');
1472
1473 if (address.endsWith("."))
1474 {
1475 index = address.substring(0, index).lastIndexOf('.');
1476 }
1477
1478 if (index + 1 < addrLength && isDigit(p_address.charAt(index + 1)))
1479 {
1480 char testChar;
1481 int numDots = 0;
1482
1483 // make sure that 1) we see only digits and dot separators, 2) that
1484 // any dot separator is preceded and followed by a digit and
1485 // 3) that we find 3 dots
1486 for (int i = 0; i < addrLength; i++)
1487 {
1488 testChar = address.charAt(i);
1489
1490 if (testChar == '.')
1491 {
1492 if (!isDigit(address.charAt(i - 1))
1493 || (i + 1 < addrLength &&!isDigit(address.charAt(i + 1))))
1494 {
1495 return false;
1496 }
1497
1498 numDots++;
1499 }
1500 else if (!isDigit(testChar))
1501 {
1502 return false;
1503 }
1504 }
1505
1506 if (numDots != 3)
1507 {
1508 return false;
1509 }
1510 }
1511 else
1512 {
1513
1514 // domain labels can contain alphanumerics and '-"
1515 // but must start and end with an alphanumeric
1516 char testChar;
1517
1518 for (int i = 0; i < addrLength; i++)
1519 {
1520 testChar = address.charAt(i);
1521
1522 if (testChar == '.')
1523 {
1524 if (!isAlphanum(address.charAt(i - 1)))
1525 {
1526 return false;
1527 }
1528
1529 if (i + 1 < addrLength &&!isAlphanum(address.charAt(i + 1)))
1530 {
1531 return false;
1532 }
1533 }
1534 else if (!isAlphanum(testChar) && testChar != '-')
1535 {
1536 return false;
1537 }
1538 }
1539 }
1540
1541 return true;
1542 }
1543
1544 /**
1545 * Determine whether a char is a digit.
1546 *
1547 *
1548 * @param p_char the character to check
1549 * @return true if the char is betweeen '0' and '9', false otherwise
1550 */
1551 private static boolean isDigit(char p_char)
1552 {
1553 return p_char >= '0' && p_char <= '9';
1554 }
1555
1556 /**
1557 * Determine whether a character is a hexadecimal character.
1558 *
1559 *
1560 * @param p_char the character to check
1561 * @return true if the char is between '0' and '9', 'a' and 'f'
1562 * or 'A' and 'F', false otherwise
1563 */
1564 private static boolean isHex(char p_char)
1565 {
1566 return (isDigit(p_char) || (p_char >= 'a' && p_char <= 'f')
1567 || (p_char >= 'A' && p_char <= 'F'));
1568 }
1569
1570 /**
1571 * Determine whether a char is an alphabetic character: a-z or A-Z
1572 *
1573 *
1574 * @param p_char the character to check
1575 * @return true if the char is alphabetic, false otherwise
1576 */
1577 private static boolean isAlpha(char p_char)
1578 {
1579 return ((p_char >= 'a' && p_char <= 'z')
1580 || (p_char >= 'A' && p_char <= 'Z'));
1581 }
1582
1583 /**
1584 * Determine whether a char is an alphanumeric: 0-9, a-z or A-Z
1585 *
1586 *
1587 * @param p_char the character to check
1588 * @return true if the char is alphanumeric, false otherwise
1589 */
1590 private static boolean isAlphanum(char p_char)
1591 {
1592 return (isAlpha(p_char) || isDigit(p_char));
1593 }
1594
1595 /**
1596 * Determine whether a character is a reserved character:
1597 * ';', '/', '?', ':', '@', '&', '=', '+', '$' or ','
1598 *
1599 *
1600 * @param p_char the character to check
1601 * @return true if the string contains any reserved characters
1602 */
1603 private static boolean isReservedCharacter(char p_char)
1604 {
1605 return RESERVED_CHARACTERS.indexOf(p_char) != -1;
1606 }
1607
1608 /**
1609 * Determine whether a char is an unreserved character.
1610 *
1611 *
1612 * @param p_char the character to check
1613 * @return true if the char is unreserved, false otherwise
1614 */
1615 private static boolean isUnreservedCharacter(char p_char)
1616 {
1617 return (isAlphanum(p_char) || MARK_CHARACTERS.indexOf(p_char) != -1);
1618 }
1619
1620 /**
1621 * Determine whether a given string contains only URI characters (also
1622 * called "uric" in RFC 2396). uric consist of all reserved
1623 * characters, unreserved characters and escaped characters.
1624 *
1625 *
1626 * @param p_uric URI string
1627 * @return true if the string is comprised of uric, false otherwise
1628 */
1629 private static boolean isURIString(String p_uric)
1630 {
1631
1632 if (p_uric == null)
1633 {
1634 return false;
1635 }
1636
1637 int end = p_uric.length();
1638 char testChar = '\0';
1639
1640 for (int i = 0; i < end; i++)
1641 {
1642 testChar = p_uric.charAt(i);
1643
1644 if (testChar == '%')
1645 {
1646 if (i + 2 >= end ||!isHex(p_uric.charAt(i + 1))
1647 ||!isHex(p_uric.charAt(i + 2)))
1648 {
1649 return false;
1650 }
1651 else
1652 {
1653 i += 2;
1654
1655 continue;
1656 }
1657 }
1658
1659 if (isReservedCharacter(testChar) || isUnreservedCharacter(testChar))
1660 {
1661 continue;
1662 }
1663 else
1664 {
1665 return false;
1666 }
1667 }
1668
1669 return true;
1670 }
1671 }
--- EOF ---