1 /* 2 * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. 3 */ 4 /* 5 * Licensed to the Apache Software Foundation (ASF) under one or more 6 * contributor license agreements. See the NOTICE file distributed with 7 * this work for additional information regarding copyright ownership. 8 * The ASF licenses this file to You under the Apache License, Version 2.0 9 * (the "License"); you may not use this file except in compliance with 10 * the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 package com.sun.org.apache.xerces.internal.impl; 22 23 import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter; 24 import com.sun.org.apache.xerces.internal.util.XML11Char; 25 import com.sun.org.apache.xerces.internal.util.XMLChar; 26 import com.sun.org.apache.xerces.internal.util.XMLStringBuffer; 27 import com.sun.org.apache.xerces.internal.xni.XMLString; 28 import com.sun.org.apache.xerces.internal.xni.XNIException; 29 import java.io.IOException; 30 31 /** 32 * This class is responsible for scanning XML document structure 33 * and content. The scanner acts as the source for the document 34 * information which is communicated to the document handler. 35 * <p> 36 * This component requires the following features and properties from the 37 * component manager that uses it: 38 * <ul> 39 * <li>http://xml.org/sax/features/namespaces</li> 40 * <li>http://xml.org/sax/features/validation</li> 41 * <li>http://apache.org/xml/features/nonvalidating/load-external-dtd</li> 42 * <li>http://apache.org/xml/features/scanner/notify-char-refs</li> 43 * <li>http://apache.org/xml/features/scanner/notify-builtin-refs</li> 44 * <li>http://apache.org/xml/properties/internal/symbol-table</li> 45 * <li>http://apache.org/xml/properties/internal/error-reporter</li> 46 * <li>http://apache.org/xml/properties/internal/entity-manager</li> 47 * <li>http://apache.org/xml/properties/internal/dtd-scanner</li> 48 * </ul> 49 * 50 * @xerces.internal 51 * 52 * @author Glenn Marcy, IBM 53 * @author Andy Clark, IBM 54 * @author Arnaud Le Hors, IBM 55 * @author Eric Ye, IBM 56 * 57 * @version $Id: XML11DocumentScannerImpl.java,v 1.5 2010/08/04 20:59:09 joehw Exp $ 58 */ 59 public class XML11DocumentScannerImpl 60 extends XMLDocumentScannerImpl { 61 62 63 /** String buffer. */ 64 private final XMLStringBuffer fStringBuffer = new XMLStringBuffer(); 65 private final XMLStringBuffer fStringBuffer2 = new XMLStringBuffer(); 66 private final XMLStringBuffer fStringBuffer3 = new XMLStringBuffer(); 67 68 // 69 // Constructors 70 // 71 72 /** Default constructor. */ 73 public XML11DocumentScannerImpl() {super();} // <init>() 74 75 // 76 // overridden methods 77 // 78 79 // XMLDocumentFragmentImpl methods 80 81 /** 82 * Scans element content. 83 * 84 * @return Returns the next character on the stream. 85 */ 86 protected int scanContent(XMLStringBuffer content) throws IOException, XNIException { 87 88 fTempString.length = 0; 89 int c = fEntityScanner.scanContent(fTempString); 90 content.append(fTempString); 91 92 if (c == '\r' || c == 0x85 || c == 0x2028) { 93 // happens when there is the character reference 94 // but scanContent doesn't do entity expansions... 95 // is this *really* necessary??? - NG 96 fEntityScanner.scanChar(null); 97 content.append((char)c); 98 c = -1; 99 } 100 /*if (fDocumentHandler != null && content.length > 0) { 101 fDocumentHandler.characters(content, null); 102 } */ 103 104 if (c == ']') { 105 content.append((char)fEntityScanner.scanChar(null)); 106 // remember where we are in case we get an endEntity before we 107 // could flush the buffer out - this happens when we're parsing an 108 // entity which ends with a ] 109 fInScanContent = true; 110 // 111 // We work on a single character basis to handle cases such as: 112 // ']]]>' which we might otherwise miss. 113 // 114 if (fEntityScanner.skipChar(']', null)) { 115 content.append(']'); 116 while (fEntityScanner.skipChar(']', null)) { 117 content.append(']'); 118 } 119 if (fEntityScanner.skipChar('>', null)) { 120 reportFatalError("CDEndInContent", null); 121 } 122 } 123 /*if (fDocumentHandler != null && fStringBuffer.length != 0) { 124 fDocumentHandler.characters(fStringBuffer, null); 125 }*/ 126 fInScanContent = false; 127 c = -1; 128 } 129 return c; 130 131 } // scanContent():int 132 133 /** 134 * Scans an attribute value and normalizes whitespace converting all 135 * whitespace characters to space characters. 136 * 137 * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" 138 * 139 * @param value The XMLString to fill in with the value. 140 * @param nonNormalizedValue The XMLString to fill in with the 141 * non-normalized value. 142 * @param atName The name of the attribute being parsed (for error msgs). 143 * @param checkEntities true if undeclared entities should be reported as VC violation, 144 * false if undeclared entities should be reported as WFC violation. 145 * @param eleName The name of element to which this attribute belongs. 146 * @param isNSURI The flag indicating whether the content is a namespace URI 147 * 148 * @return true if the non-normalized and normalized value are the same 149 * 150 * <strong>Note:</strong> This method uses fStringBuffer2, anything in it 151 * at the time of calling is lost. 152 **/ 153 protected boolean scanAttributeValue(XMLString value, 154 XMLString nonNormalizedValue, 155 String atName, 156 boolean checkEntities,String eleName, boolean isNSURI) 157 throws IOException, XNIException 158 { 159 // quote 160 int quote = fEntityScanner.peekChar(); 161 if (quote != '\'' && quote != '"') { 162 reportFatalError("OpenQuoteExpected", new Object[]{eleName,atName}); 163 } 164 165 fEntityScanner.scanChar(NameType.ATTRIBUTE); 166 int entityDepth = fEntityDepth; 167 168 int c = fEntityScanner.scanLiteral(quote, value, isNSURI); 169 if (DEBUG_ATTR_NORMALIZATION) { 170 System.out.println("** scanLiteral -> \"" 171 + value.toString() + "\""); 172 } 173 174 int fromIndex = 0; 175 if (c == quote && (fromIndex = isUnchangedByNormalization(value)) == -1) { 176 /** Both the non-normalized and normalized attribute values are equal. **/ 177 nonNormalizedValue.setValues(value); 178 int cquote = fEntityScanner.scanChar(NameType.ATTRIBUTE); 179 if (cquote != quote) { 180 reportFatalError("CloseQuoteExpected", new Object[]{eleName,atName}); 181 } 182 return true; 183 } 184 fStringBuffer2.clear(); 185 fStringBuffer2.append(value); 186 normalizeWhitespace(value, fromIndex); 187 if (DEBUG_ATTR_NORMALIZATION) { 188 System.out.println("** normalizeWhitespace -> \"" 189 + value.toString() + "\""); 190 } 191 if (c != quote) { 192 fScanningAttribute = true; 193 fStringBuffer.clear(); 194 do { 195 fStringBuffer.append(value); 196 if (DEBUG_ATTR_NORMALIZATION) { 197 System.out.println("** value2: \"" 198 + fStringBuffer.toString() + "\""); 199 } 200 if (c == '&') { 201 fEntityScanner.skipChar('&', NameType.REFERENCE); 202 if (entityDepth == fEntityDepth) { 203 fStringBuffer2.append('&'); 204 } 205 if (fEntityScanner.skipChar('#', NameType.REFERENCE)) { 206 if (entityDepth == fEntityDepth) { 207 fStringBuffer2.append('#'); 208 } 209 int ch = scanCharReferenceValue(fStringBuffer, fStringBuffer2); 210 if (ch != -1) { 211 if (DEBUG_ATTR_NORMALIZATION) { 212 System.out.println("** value3: \"" 213 + fStringBuffer.toString() 214 + "\""); 215 } 216 } 217 } 218 else { 219 String entityName = fEntityScanner.scanName(NameType.REFERENCE); 220 if (entityName == null) { 221 reportFatalError("NameRequiredInReference", null); 222 } 223 else if (entityDepth == fEntityDepth) { 224 fStringBuffer2.append(entityName); 225 } 226 if (!fEntityScanner.skipChar(';', NameType.REFERENCE)) { 227 reportFatalError("SemicolonRequiredInReference", 228 new Object []{entityName}); 229 } 230 else if (entityDepth == fEntityDepth) { 231 fStringBuffer2.append(';'); 232 } 233 if (resolveCharacter(entityName, fStringBuffer)) { 234 checkEntityLimit(false, fEntityScanner.fCurrentEntity.name, 1); 235 } 236 else { 237 if (fEntityManager.isExternalEntity(entityName)) { 238 reportFatalError("ReferenceToExternalEntity", 239 new Object[] { entityName }); 240 } 241 else { 242 if (!fEntityManager.isDeclaredEntity(entityName)) { 243 //WFC & VC: Entity Declared 244 if (checkEntities) { 245 if (fValidation) { 246 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 247 "EntityNotDeclared", 248 new Object[]{entityName}, 249 XMLErrorReporter.SEVERITY_ERROR); 250 } 251 } 252 else { 253 reportFatalError("EntityNotDeclared", 254 new Object[]{entityName}); 255 } 256 } 257 fEntityManager.startEntity(true, entityName, true); 258 } 259 } 260 } 261 } 262 else if (c == '<') { 263 reportFatalError("LessthanInAttValue", 264 new Object[] { eleName, atName }); 265 fEntityScanner.scanChar(null); 266 if (entityDepth == fEntityDepth) { 267 fStringBuffer2.append((char)c); 268 } 269 } 270 else if (c == '%' || c == ']') { 271 fEntityScanner.scanChar(null); 272 fStringBuffer.append((char)c); 273 if (entityDepth == fEntityDepth) { 274 fStringBuffer2.append((char)c); 275 } 276 if (DEBUG_ATTR_NORMALIZATION) { 277 System.out.println("** valueF: \"" 278 + fStringBuffer.toString() + "\""); 279 } 280 } 281 // note that none of these characters should ever get through 282 // XML11EntityScanner. Not sure why 283 // this check was originally necessary. - NG 284 else if (c == '\n' || c == '\r' || c == 0x85 || c == 0x2028) { 285 fEntityScanner.scanChar(null); 286 fStringBuffer.append(' '); 287 if (entityDepth == fEntityDepth) { 288 fStringBuffer2.append('\n'); 289 } 290 } 291 else if (c != -1 && XMLChar.isHighSurrogate(c)) { 292 fStringBuffer3.clear(); 293 if (scanSurrogates(fStringBuffer3)) { 294 fStringBuffer.append(fStringBuffer3); 295 if (entityDepth == fEntityDepth) { 296 fStringBuffer2.append(fStringBuffer3); 297 } 298 if (DEBUG_ATTR_NORMALIZATION) { 299 System.out.println("** valueI: \"" 300 + fStringBuffer.toString() 301 + "\""); 302 } 303 } 304 } 305 else if (c != -1 && isInvalidLiteral(c)) { 306 reportFatalError("InvalidCharInAttValue", 307 new Object[] {eleName, atName, Integer.toString(c, 16)}); 308 fEntityScanner.scanChar(null); 309 if (entityDepth == fEntityDepth) { 310 fStringBuffer2.append((char)c); 311 } 312 } 313 c = fEntityScanner.scanLiteral(quote, value, isNSURI); 314 if (entityDepth == fEntityDepth) { 315 fStringBuffer2.append(value); 316 } 317 normalizeWhitespace(value); 318 } while (c != quote || entityDepth != fEntityDepth); 319 fStringBuffer.append(value); 320 if (DEBUG_ATTR_NORMALIZATION) { 321 System.out.println("** valueN: \"" 322 + fStringBuffer.toString() + "\""); 323 } 324 value.setValues(fStringBuffer); 325 fScanningAttribute = false; 326 } 327 nonNormalizedValue.setValues(fStringBuffer2); 328 329 // quote 330 int cquote = fEntityScanner.scanChar(null); 331 if (cquote != quote) { 332 reportFatalError("CloseQuoteExpected", new Object[]{eleName,atName}); 333 } 334 return nonNormalizedValue.equals(value.ch, value.offset, value.length); 335 } // scanAttributeValue() 336 337 // 338 // XMLScanner methods 339 // 340 // NOTE: this is a carbon copy of the code in XML11DTDScannerImpl; 341 // we need to override these methods in both places. 342 // this needs to be refactored!!! - NG 343 /** 344 * Scans public ID literal. 345 * 346 * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 347 * [13] PubidChar::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] 348 * 349 * The returned string is normalized according to the following rule, 350 * from http://www.w3.org/TR/REC-xml#dt-pubid: 351 * 352 * Before a match is attempted, all strings of white space in the public 353 * identifier must be normalized to single space characters (#x20), and 354 * leading and trailing white space must be removed. 355 * 356 * @param literal The string to fill in with the public ID literal. 357 * @return True on success. 358 * 359 * <strong>Note:</strong> This method uses fStringBuffer, anything in it at 360 * the time of calling is lost. 361 */ 362 protected boolean scanPubidLiteral(XMLString literal) 363 throws IOException, XNIException 364 { 365 int quote = fEntityScanner.scanChar(null); 366 if (quote != '\'' && quote != '"') { 367 reportFatalError("QuoteRequiredInPublicID", null); 368 return false; 369 } 370 371 fStringBuffer.clear(); 372 // skip leading whitespace 373 boolean skipSpace = true; 374 boolean dataok = true; 375 while (true) { 376 int c = fEntityScanner.scanChar(null); 377 // REVISIT: none of these except \n and 0x20 should make it past the entity scanner 378 if (c == ' ' || c == '\n' || c == '\r' || c == 0x85 || c == 0x2028) { 379 if (!skipSpace) { 380 // take the first whitespace as a space and skip the others 381 fStringBuffer.append(' '); 382 skipSpace = true; 383 } 384 } 385 else if (c == quote) { 386 if (skipSpace) { 387 // if we finished on a space let's trim it 388 fStringBuffer.length--; 389 } 390 literal.setValues(fStringBuffer); 391 break; 392 } 393 else if (XMLChar.isPubid(c)) { 394 fStringBuffer.append((char)c); 395 skipSpace = false; 396 } 397 else if (c == -1) { 398 reportFatalError("PublicIDUnterminated", null); 399 return false; 400 } 401 else { 402 dataok = false; 403 reportFatalError("InvalidCharInPublicID", 404 new Object[]{Integer.toHexString(c)}); 405 } 406 } 407 return dataok; 408 } 409 410 /** 411 * Normalize whitespace in an XMLString converting all whitespace 412 * characters to space characters. 413 */ 414 protected void normalizeWhitespace(XMLString value) { 415 int end = value.offset + value.length; 416 for (int i = value.offset; i < end; ++i) { 417 int c = value.ch[i]; 418 if (XMLChar.isSpace(c)) { 419 value.ch[i] = ' '; 420 } 421 } 422 } 423 424 /** 425 * Normalize whitespace in an XMLString converting all whitespace 426 * characters to space characters. 427 */ 428 protected void normalizeWhitespace(XMLString value, int fromIndex) { 429 int end = value.offset + value.length; 430 for (int i = value.offset + fromIndex; i < end; ++i) { 431 int c = value.ch[i]; 432 if (XMLChar.isSpace(c)) { 433 value.ch[i] = ' '; 434 } 435 } 436 } 437 438 /** 439 * Checks whether this string would be unchanged by normalization. 440 * 441 * @return -1 if the value would be unchanged by normalization, 442 * otherwise the index of the first whitespace character which 443 * would be transformed. 444 */ 445 protected int isUnchangedByNormalization(XMLString value) { 446 int end = value.offset + value.length; 447 for (int i = value.offset; i < end; ++i) { 448 int c = value.ch[i]; 449 if (XMLChar.isSpace(c)) { 450 return i - value.offset; 451 } 452 } 453 return -1; 454 } 455 456 // returns true if the given character is not 457 // valid with respect to the version of 458 // XML understood by this scanner. 459 protected boolean isInvalid(int value) { 460 return (XML11Char.isXML11Invalid(value)); 461 } // isInvalid(int): boolean 462 463 // returns true if the given character is not 464 // valid or may not be used outside a character reference 465 // with respect to the version of XML understood by this scanner. 466 protected boolean isInvalidLiteral(int value) { 467 return (!XML11Char.isXML11ValidLiteral(value)); 468 } // isInvalidLiteral(int): boolean 469 470 // returns true if the given character is 471 // a valid nameChar with respect to the version of 472 // XML understood by this scanner. 473 protected boolean isValidNameChar(int value) { 474 return (XML11Char.isXML11Name(value)); 475 } // isValidNameChar(int): boolean 476 477 // returns true if the given character is 478 // a valid nameStartChar with respect to the version of 479 // XML understood by this scanner. 480 protected boolean isValidNameStartChar(int value) { 481 return (XML11Char.isXML11NameStart(value)); 482 } // isValidNameStartChar(int): boolean 483 484 // returns true if the given character is 485 // a valid NCName character with respect to the version of 486 // XML understood by this scanner. 487 protected boolean isValidNCName(int value) { 488 return (XML11Char.isXML11NCName(value)); 489 } // isValidNCName(int): boolean 490 491 // returns true if the given character is 492 // a valid high surrogate for a nameStartChar 493 // with respect to the version of XML understood 494 // by this scanner. 495 protected boolean isValidNameStartHighSurrogate(int value) { 496 return XML11Char.isXML11NameHighSurrogate(value); 497 } // isValidNameStartHighSurrogate(int): boolean 498 499 protected boolean versionSupported(String version) { 500 return (version.equals("1.1") || version.equals("1.0")); 501 } // versionSupported(String): boolean 502 503 // returns the error message key for unsupported 504 // versions of XML with respect to the version of 505 // XML understood by this scanner. 506 protected String getVersionNotSupportedKey () { 507 return "VersionNotSupported11"; 508 } // getVersionNotSupportedKey: String 509 510 } // class XML11DocumentScannerImpl