1 /* 2 * reserved comment block 3 * DO NOT REMOVE OR ALTER! 4 */ 5 /* 6 * Copyright 2001-2005 The Apache Software Foundation. 7 * 8 * Licensed under the Apache License, Version 2.0 (the "License"); 9 * you may not use this file except in compliance with the License. 10 * You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 package com.sun.org.apache.xerces.internal.impl.dv.xs; 22 23 import com.sun.org.apache.xerces.internal.impl.dv.InvalidDatatypeValueException; 24 import com.sun.org.apache.xerces.internal.util.URI; 25 import com.sun.org.apache.xerces.internal.impl.dv.ValidationContext; 26 27 /** 28 * Represent the schema type "anyURI" 29 * 30 * @xerces.internal 31 * 32 * @author Neeraj Bajaj, Sun Microsystems, inc. 33 * @author Sandy Gao, IBM 34 * 35 */ 36 public class AnyURIDV extends TypeValidator { 37 38 private static final URI BASE_URI; 39 static { 40 URI uri = null; 41 try { 42 uri = new URI("abc://def.ghi.jkl"); 43 } catch (URI.MalformedURIException ex) { 44 } 45 BASE_URI = uri; 46 } 47 48 public short getAllowedFacets(){ 49 return (XSSimpleTypeDecl.FACET_LENGTH | XSSimpleTypeDecl.FACET_MINLENGTH | XSSimpleTypeDecl.FACET_MAXLENGTH | XSSimpleTypeDecl.FACET_PATTERN | XSSimpleTypeDecl.FACET_ENUMERATION | XSSimpleTypeDecl.FACET_WHITESPACE ); 50 } 51 52 // before we return string we have to make sure it is correct URI as per spec. 53 // for some types (string and derived), they just return the string itself 54 public Object getActualValue(String content, ValidationContext context) throws InvalidDatatypeValueException { 55 // check 3.2.17.c0 must: URI (rfc 2396/2723) 56 try { 57 if( content.length() != 0 ) { 58 // encode special characters using XLink 5.4 algorithm 59 final String encoded = encode(content); 60 // Support for relative URLs 61 // According to Java 1.1: URLs may also be specified with a 62 // String and the URL object that it is related to. 63 new URI(BASE_URI, encoded ); 64 } 65 } catch (URI.MalformedURIException ex) { 66 throw new InvalidDatatypeValueException("cvc-datatype-valid.1.2.1", new Object[]{content, "anyURI"}); 67 } 68 69 // REVISIT: do we need to return the new URI object? 70 return content; 71 } 72 73 // which ASCII characters need to be escaped 74 private static boolean gNeedEscaping[] = new boolean[128]; 75 // the first hex character if a character needs to be escaped 76 private static char gAfterEscaping1[] = new char[128]; 77 // the second hex character if a character needs to be escaped 78 private static char gAfterEscaping2[] = new char[128]; 79 private static char[] gHexChs = {'0', '1', '2', '3', '4', '5', '6', '7', 80 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'}; 81 // initialize the above 3 arrays 82 static { 83 for (int i = 0; i <= 0x1f; i++) { 84 gNeedEscaping[i] = true; 85 gAfterEscaping1[i] = gHexChs[i >> 4]; 86 gAfterEscaping2[i] = gHexChs[i & 0xf]; 87 } 88 gNeedEscaping[0x7f] = true; 89 gAfterEscaping1[0x7f] = '7'; 90 gAfterEscaping2[0x7f] = 'F'; 91 char[] escChs = {' ', '<', '>', '"', '{', '}', 92 '|', '\\', '^', '~', '`'}; 93 int len = escChs.length; 94 char ch; 95 for (int i = 0; i < len; i++) { 96 ch = escChs[i]; 97 gNeedEscaping[ch] = true; 98 gAfterEscaping1[ch] = gHexChs[ch >> 4]; 99 gAfterEscaping2[ch] = gHexChs[ch & 0xf]; 100 } 101 } 102 103 // To encode special characters in anyURI, by using %HH to represent 104 // special ASCII characters: 0x00~0x1F, 0x7F, ' ', '<', '>', etc. 105 // and non-ASCII characters (whose value >= 128). 106 private static String encode(String anyURI){ 107 int len = anyURI.length(), ch; 108 StringBuffer buffer = new StringBuffer(len*3); 109 110 // for each character in the anyURI 111 int i = 0; 112 for (; i < len; i++) { 113 ch = anyURI.charAt(i); 114 // if it's not an ASCII character, break here, and use UTF-8 encoding 115 if (ch >= 128) 116 break; 117 if (gNeedEscaping[ch]) { 118 buffer.append('%'); 119 buffer.append(gAfterEscaping1[ch]); 120 buffer.append(gAfterEscaping2[ch]); 121 } 122 else { 123 buffer.append((char)ch); 124 } 125 } 126 127 // we saw some non-ascii character 128 if (i < len) { 129 // get UTF-8 bytes for the remaining sub-string 130 byte[] bytes = null; 131 byte b; 132 try { 133 bytes = anyURI.substring(i).getBytes("UTF-8"); 134 } catch (java.io.UnsupportedEncodingException e) { 135 // should never happen 136 return anyURI; 137 } 138 len = bytes.length; 139 140 // for each byte 141 for (i = 0; i < len; i++) { 142 b = bytes[i]; 143 // for non-ascii character: make it positive, then escape 144 if (b < 0) { 145 ch = b + 256; 146 buffer.append('%'); 147 buffer.append(gHexChs[ch >> 4]); 148 buffer.append(gHexChs[ch & 0xf]); 149 } 150 else if (gNeedEscaping[b]) { 151 buffer.append('%'); 152 buffer.append(gAfterEscaping1[b]); 153 buffer.append(gAfterEscaping2[b]); 154 } 155 else { 156 buffer.append((char)b); 157 } 158 } 159 } 160 161 // If encoding happened, create a new string; 162 // otherwise, return the orginal one. 163 if (buffer.length() != len) { 164 return buffer.toString(); 165 } 166 else { 167 return anyURI; 168 } 169 } 170 171 } // class AnyURIDV