1 /*
   2  * reserved comment block
   3  * DO NOT REMOVE OR ALTER!
   4  */
   5 /*
   6  * Copyright 2001-2005 The Apache Software Foundation.
   7  *
   8  * Licensed under the Apache License, Version 2.0 (the "License");
   9  * you may not use this file except in compliance with the License.
  10  * You may obtain a copy of the License at
  11  *
  12  *      http://www.apache.org/licenses/LICENSE-2.0
  13  *
  14  * Unless required by applicable law or agreed to in writing, software
  15  * distributed under the License is distributed on an "AS IS" BASIS,
  16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17  * See the License for the specific language governing permissions and
  18  * limitations under the License.
  19  */
  20 
  21 package com.sun.org.apache.xerces.internal.impl.dv.xs;
  22 
  23 import com.sun.org.apache.xerces.internal.impl.dv.InvalidDatatypeValueException;
  24 import com.sun.org.apache.xerces.internal.util.URI;
  25 import com.sun.org.apache.xerces.internal.impl.dv.ValidationContext;
  26 
  27 /**
  28  * Represent the schema type "anyURI"
  29  *
  30  * @xerces.internal
  31  *
  32  * @author Neeraj Bajaj, Sun Microsystems, inc.
  33  * @author Sandy Gao, IBM
  34  *
  35  */
  36 public class AnyURIDV extends TypeValidator {
  37 
  38     private static final URI BASE_URI;
  39     static {
  40         URI uri = null;
  41         try {
  42             uri = new URI("abc://def.ghi.jkl");
  43         } catch (URI.MalformedURIException ex) {
  44         }
  45         BASE_URI = uri;
  46     }
  47 
  48     public short getAllowedFacets(){
  49         return (XSSimpleTypeDecl.FACET_LENGTH | XSSimpleTypeDecl.FACET_MINLENGTH | XSSimpleTypeDecl.FACET_MAXLENGTH | XSSimpleTypeDecl.FACET_PATTERN | XSSimpleTypeDecl.FACET_ENUMERATION | XSSimpleTypeDecl.FACET_WHITESPACE );
  50     }
  51 
  52     // before we return string we have to make sure it is correct URI as per spec.
  53     // for some types (string and derived), they just return the string itself
  54     public Object getActualValue(String content, ValidationContext context) throws InvalidDatatypeValueException {
  55         // check 3.2.17.c0 must: URI (rfc 2396/2723)
  56         try {
  57             if( content.length() != 0 ) {
  58                 // encode special characters using XLink 5.4 algorithm
  59                 final String encoded = encode(content);
  60                 // Support for relative URLs
  61                 // According to Java 1.1: URLs may also be specified with a
  62                 // String and the URL object that it is related to.
  63                 new URI(BASE_URI, encoded );
  64             }
  65         } catch (URI.MalformedURIException ex) {
  66             throw new InvalidDatatypeValueException("cvc-datatype-valid.1.2.1", new Object[]{content, "anyURI"});
  67         }
  68 
  69         // REVISIT: do we need to return the new URI object?
  70         return content;
  71     }
  72 
  73     // which ASCII characters need to be escaped
  74     private static boolean gNeedEscaping[] = new boolean[128];
  75     // the first hex character if a character needs to be escaped
  76     private static char gAfterEscaping1[] = new char[128];
  77     // the second hex character if a character needs to be escaped
  78     private static char gAfterEscaping2[] = new char[128];
  79     private static char[] gHexChs = {'0', '1', '2', '3', '4', '5', '6', '7',
  80                                      '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
  81     // initialize the above 3 arrays
  82     static {
  83         for (int i = 0; i <= 0x1f; i++) {
  84             gNeedEscaping[i] = true;
  85             gAfterEscaping1[i] = gHexChs[i >> 4];
  86             gAfterEscaping2[i] = gHexChs[i & 0xf];
  87         }
  88         gNeedEscaping[0x7f] = true;
  89         gAfterEscaping1[0x7f] = '7';
  90         gAfterEscaping2[0x7f] = 'F';
  91         char[] escChs = {' ', '<', '>', '"', '{', '}',
  92                          '|', '\\', '^', '~', '`'};
  93         int len = escChs.length;
  94         char ch;
  95         for (int i = 0; i < len; i++) {
  96             ch = escChs[i];
  97             gNeedEscaping[ch] = true;
  98             gAfterEscaping1[ch] = gHexChs[ch >> 4];
  99             gAfterEscaping2[ch] = gHexChs[ch & 0xf];
 100         }
 101     }
 102 
 103     // To encode special characters in anyURI, by using %HH to represent
 104     // special ASCII characters: 0x00~0x1F, 0x7F, ' ', '<', '>', etc.
 105     // and non-ASCII characters (whose value >= 128).
 106     private static String encode(String anyURI){
 107         int len = anyURI.length(), ch;
 108         StringBuffer buffer = new StringBuffer(len*3);
 109 
 110         // for each character in the anyURI
 111         int i = 0;
 112         for (; i < len; i++) {
 113             ch = anyURI.charAt(i);
 114             // if it's not an ASCII character, break here, and use UTF-8 encoding
 115             if (ch >= 128)
 116                 break;
 117             if (gNeedEscaping[ch]) {
 118                 buffer.append('%');
 119                 buffer.append(gAfterEscaping1[ch]);
 120                 buffer.append(gAfterEscaping2[ch]);
 121             }
 122             else {
 123                 buffer.append((char)ch);
 124             }
 125         }
 126 
 127         // we saw some non-ascii character
 128         if (i < len) {
 129             // get UTF-8 bytes for the remaining sub-string
 130             byte[] bytes = null;
 131             byte b;
 132             try {
 133                 bytes = anyURI.substring(i).getBytes("UTF-8");
 134             } catch (java.io.UnsupportedEncodingException e) {
 135                 // should never happen
 136                 return anyURI;
 137             }
 138             len = bytes.length;
 139 
 140             // for each byte
 141             for (i = 0; i < len; i++) {
 142                 b = bytes[i];
 143                 // for non-ascii character: make it positive, then escape
 144                 if (b < 0) {
 145                     ch = b + 256;
 146                     buffer.append('%');
 147                     buffer.append(gHexChs[ch >> 4]);
 148                     buffer.append(gHexChs[ch & 0xf]);
 149                 }
 150                 else if (gNeedEscaping[b]) {
 151                     buffer.append('%');
 152                     buffer.append(gAfterEscaping1[b]);
 153                     buffer.append(gAfterEscaping2[b]);
 154                 }
 155                 else {
 156                     buffer.append((char)b);
 157                 }
 158             }
 159         }
 160 
 161         // If encoding happened, create a new string;
 162         // otherwise, return the orginal one.
 163         if (buffer.length() != len) {
 164             return buffer.toString();
 165         }
 166         else {
 167             return anyURI;
 168         }
 169     }
 170 
 171 } // class AnyURIDV