1 /* 2 * Copyright (c) 2006, 2017, Oracle and/or its affiliates. All rights reserved. 3 */ 4 /* 5 * Licensed to the Apache Software Foundation (ASF) under one or more 6 * contributor license agreements. See the NOTICE file distributed with 7 * this work for additional information regarding copyright ownership. 8 * The ASF licenses this file to You under the Apache License, Version 2.0 9 * (the "License"); you may not use this file except in compliance with 10 * the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 package com.sun.org.apache.xerces.internal.impl.xs.models; 22 23 import com.sun.org.apache.xerces.internal.impl.dtd.models.CMNode; 24 import com.sun.org.apache.xerces.internal.impl.dtd.models.CMStateSet; 25 import com.sun.org.apache.xerces.internal.impl.xs.SchemaSymbols; 26 import com.sun.org.apache.xerces.internal.impl.xs.SubstitutionGroupHandler; 27 import com.sun.org.apache.xerces.internal.impl.xs.XMLSchemaException; 28 import com.sun.org.apache.xerces.internal.impl.xs.XSConstraints; 29 import com.sun.org.apache.xerces.internal.impl.xs.XSElementDecl; 30 import com.sun.org.apache.xerces.internal.impl.xs.XSModelGroupImpl; 31 import com.sun.org.apache.xerces.internal.impl.xs.XSParticleDecl; 32 import com.sun.org.apache.xerces.internal.impl.xs.XSWildcardDecl; 33 import com.sun.org.apache.xerces.internal.xni.QName; 34 import java.util.ArrayList; 35 import java.util.HashMap; 36 import java.util.List; 37 import java.util.Map; 38 39 /** 40 * DFAContentModel is the implementation of XSCMValidator that does 41 * all of the non-trivial element content validation. This class does 42 * the conversion from the regular expression to the DFA that 43 * it then uses in its validation algorithm. 44 * 45 * @xerces.internal 46 * 47 * @author Neil Graham, IBM 48 * @LastModified: Oct 2017 49 */ 50 public class XSDFACM 51 implements XSCMValidator { 52 53 // 54 // Constants 55 // 56 private static final boolean DEBUG = false; 57 58 // special strings 59 60 // debugging 61 62 /** Set to true to debug content model validation. */ 63 private static final boolean DEBUG_VALIDATE_CONTENT = false; 64 65 // 66 // Data 67 // 68 69 /** 70 * This is the map of unique input symbol elements to indices into 71 * each state's per-input symbol transition table entry. This is part 72 * of the built DFA information that must be kept around to do the 73 * actual validation. Note tat since either XSElementDecl or XSParticleDecl object 74 * can live here, we've got to use an Object. 75 */ 76 private Object fElemMap[] = null; 77 78 /** 79 * This is a map of whether the element map contains information 80 * related to ANY models. 81 */ 82 private int fElemMapType[] = null; 83 84 /** 85 * id of the unique input symbol 86 */ 87 private int fElemMapId[] = null; 88 89 /** The element map size. */ 90 private int fElemMapSize = 0; 91 92 /** 93 * This is an array of booleans, one per state (there are 94 * fTransTableSize states in the DFA) that indicates whether that 95 * state is a final state. 96 */ 97 private boolean fFinalStateFlags[] = null; 98 99 /** 100 * The list of follow positions for each NFA position (i.e. for each 101 * non-epsilon leaf node.) This is only used during the building of 102 * the DFA, and is let go afterwards. 103 */ 104 private CMStateSet fFollowList[] = null; 105 106 /** 107 * This is the head node of our intermediate representation. It is 108 * only non-null during the building of the DFA (just so that it 109 * does not have to be passed all around.) Once the DFA is built, 110 * this is no longer required so its nulled out. 111 */ 112 private CMNode fHeadNode = null; 113 114 /** 115 * The count of leaf nodes. This is an important number that set some 116 * limits on the sizes of data structures in the DFA process. 117 */ 118 private int fLeafCount = 0; 119 120 /** 121 * An array of non-epsilon leaf nodes, which is used during the DFA 122 * build operation, then dropped. 123 */ 124 private XSCMLeaf fLeafList[] = null; 125 126 /** Array mapping ANY types to the leaf list. */ 127 private int fLeafListType[] = null; 128 129 /** 130 * This is the transition table that is the main by product of all 131 * of the effort here. It is an array of arrays of ints. The first 132 * dimension is the number of states we end up with in the DFA. The 133 * second dimensions is the number of unique elements in the content 134 * model (fElemMapSize). Each entry in the second dimension indicates 135 * the new state given that input for the first dimension's start 136 * state. 137 * <p> 138 * The fElemMap array handles mapping from element indexes to 139 * positions in the second dimension of the transition table. 140 */ 141 private int fTransTable[][] = null; 142 143 /** 144 * Array containing occurence information for looping states 145 * which use counters to check minOccurs/maxOccurs. 146 */ 147 private Occurence [] fCountingStates = null; 148 static final class Occurence { 149 final int minOccurs; 150 final int maxOccurs; 151 final int elemIndex; 152 public Occurence (XSCMRepeatingLeaf leaf, int elemIndex) { 153 minOccurs = leaf.getMinOccurs(); 154 maxOccurs = leaf.getMaxOccurs(); 155 this.elemIndex = elemIndex; 156 } 157 public String toString() { 158 return "minOccurs=" + minOccurs 159 + ";maxOccurs=" + 160 ((maxOccurs != SchemaSymbols.OCCURRENCE_UNBOUNDED) 161 ? Integer.toString(maxOccurs) : "unbounded"); 162 } 163 } 164 165 /** 166 * The number of valid entries in the transition table, and in the other 167 * related tables such as fFinalStateFlags. 168 */ 169 private int fTransTableSize = 0; 170 171 private boolean fIsCompactedForUPA; 172 173 /** 174 * Array of counters for all the for elements (or wildcards) 175 * of the form a{n,m} where n > 1 and m <= unbounded. Used 176 * to count the a's to later check against n and m. Counter 177 * set to -1 if element (or wildcard) not optimized by 178 * constant space algorithm. 179 */ 180 private int fElemMapCounter[]; 181 182 /** 183 * Array of lower bounds for all the for elements (or wildcards) 184 * of the form a{n,m} where n > 1 and m <= unbounded. This array 185 * stores the n's for those elements (or wildcards) for which 186 * the constant space algorithm applies (or -1 otherwise). 187 */ 188 private int fElemMapCounterLowerBound[]; 189 190 /** 191 * Array of upper bounds for all the for elements (or wildcards) 192 * of the form a{n,m} where n > 1 and m <= unbounded. This array 193 * stores the n's for those elements (or wildcards) for which 194 * the constant space algorithm applies, or -1 if algorithm does 195 * not apply or m = unbounded. 196 */ 197 private int fElemMapCounterUpperBound[]; // -1 if no upper bound 198 199 // temp variables 200 201 // 202 // Constructors 203 // 204 205 /** 206 * Constructs a DFA content model. 207 * 208 * @param syntaxTree The syntax tree of the content model. 209 * @param leafCount The number of leaves. 210 * 211 * @exception RuntimeException Thrown if DFA can't be built. 212 */ 213 214 public XSDFACM(CMNode syntaxTree, int leafCount) { 215 216 // Store away our index and pools in members 217 fLeafCount = leafCount; 218 fIsCompactedForUPA = syntaxTree.isCompactedForUPA(); 219 220 // 221 // Create some string pool indexes that represent the names of some 222 // magical nodes in the syntax tree. 223 // (already done in static initialization... 224 // 225 226 // 227 // Ok, so lets grind through the building of the DFA. This method 228 // handles the high level logic of the algorithm, but it uses a 229 // number of helper classes to do its thing. 230 // 231 // In order to avoid having hundreds of references to the error and 232 // string handlers around, this guy and all of his helper classes 233 // just throw a simple exception and we then pass it along. 234 // 235 236 if(DEBUG_VALIDATE_CONTENT) { 237 XSDFACM.time -= System.currentTimeMillis(); 238 } 239 240 buildDFA(syntaxTree); 241 242 if(DEBUG_VALIDATE_CONTENT) { 243 XSDFACM.time += System.currentTimeMillis(); 244 System.out.println("DFA build: " + XSDFACM.time + "ms"); 245 } 246 } 247 248 private static long time = 0; 249 250 // 251 // XSCMValidator methods 252 // 253 254 /** 255 * check whether the given state is one of the final states 256 * 257 * @param state the state to check 258 * 259 * @return whether it's a final state 260 */ 261 public boolean isFinalState (int state) { 262 return (state < 0)? false : 263 fFinalStateFlags[state]; 264 } 265 266 /** 267 * one transition only 268 * 269 * @param curElem The current element's QName 270 * @param state stack to store the previous state 271 * @param subGroupHandler the substitution group handler 272 * 273 * @return null if transition is invalid; otherwise the Object corresponding to the 274 * XSElementDecl or XSWildcardDecl identified. Also, the 275 * state array will be modified to include the new state; this so that the validator can 276 * store it away. 277 * 278 * @exception RuntimeException thrown on error 279 */ 280 public Object oneTransition(QName curElem, int[] state, SubstitutionGroupHandler subGroupHandler) { 281 int curState = state[0]; 282 283 if(curState == XSCMValidator.FIRST_ERROR || curState == XSCMValidator.SUBSEQUENT_ERROR) { 284 // there was an error last time; so just go find correct Object in fElemmMap. 285 // ... after resetting state[0]. 286 if(curState == XSCMValidator.FIRST_ERROR) 287 state[0] = XSCMValidator.SUBSEQUENT_ERROR; 288 289 return findMatchingDecl(curElem, subGroupHandler); 290 } 291 292 int nextState = 0; 293 int elemIndex = 0; 294 Object matchingDecl = null; 295 296 for (; elemIndex < fElemMapSize; elemIndex++) { 297 nextState = fTransTable[curState][elemIndex]; 298 if (nextState == -1) 299 continue; 300 int type = fElemMapType[elemIndex] ; 301 if (type == XSParticleDecl.PARTICLE_ELEMENT) { 302 matchingDecl = subGroupHandler.getMatchingElemDecl(curElem, (XSElementDecl)fElemMap[elemIndex]); 303 if (matchingDecl != null) { 304 // Increment counter if constant space algorithm applies 305 if (fElemMapCounter[elemIndex] >= 0) { 306 fElemMapCounter[elemIndex]++; 307 } 308 break; 309 } 310 } 311 else if (type == XSParticleDecl.PARTICLE_WILDCARD) { 312 if (((XSWildcardDecl)fElemMap[elemIndex]).allowNamespace(curElem.uri)) { 313 matchingDecl = fElemMap[elemIndex]; 314 // Increment counter if constant space algorithm applies 315 if (fElemMapCounter[elemIndex] >= 0) { 316 fElemMapCounter[elemIndex]++; 317 } 318 break; 319 } 320 } 321 } 322 323 // if we still can't find a match, set the state to first_error 324 // and return null 325 if (elemIndex == fElemMapSize) { 326 state[1] = state[0]; 327 state[0] = XSCMValidator.FIRST_ERROR; 328 return findMatchingDecl(curElem, subGroupHandler); 329 } 330 331 if (fCountingStates != null) { 332 Occurence o = fCountingStates[curState]; 333 if (o != null) { 334 if (curState == nextState) { 335 if (++state[2] > o.maxOccurs && 336 o.maxOccurs != SchemaSymbols.OCCURRENCE_UNBOUNDED) { 337 // It's likely that we looped too many times on the current state 338 // however it's possible that we actually matched another particle 339 // which allows the same name. 340 // 341 // Consider: 342 // 343 // <xs:sequence> 344 // <xs:element name="foo" type="xs:string" minOccurs="3" maxOccurs="3"/> 345 // <xs:element name="foo" type="xs:string" fixed="bar"/> 346 // </xs:sequence> 347 // 348 // and 349 // 350 // <xs:sequence> 351 // <xs:element name="foo" type="xs:string" minOccurs="3" maxOccurs="3"/> 352 // <xs:any namespace="##any" processContents="skip"/> 353 // </xs:sequence> 354 // 355 // In the DFA there will be two transitions from the current state which 356 // allow "foo". Note that this is not a UPA violation. The ambiguity of which 357 // transition to take is resolved by the current value of the counter. Since 358 // we've already seen enough instances of the first "foo" perhaps there is 359 // another element declaration or wildcard deeper in the element map which 360 // matches. 361 return findMatchingDecl(curElem, state, subGroupHandler, elemIndex); 362 } 363 } 364 else if (state[2] < o.minOccurs) { 365 // not enough loops on the current state. 366 state[1] = state[0]; 367 state[0] = XSCMValidator.FIRST_ERROR; 368 return findMatchingDecl(curElem, subGroupHandler); 369 } 370 else { 371 // Exiting a counting state. If we're entering a new 372 // counting state, reset the counter. 373 o = fCountingStates[nextState]; 374 if (o != null) { 375 state[2] = (elemIndex == o.elemIndex) ? 1 : 0; 376 } 377 } 378 } 379 else { 380 o = fCountingStates[nextState]; 381 if (o != null) { 382 // Entering a new counting state. Reset the counter. 383 // If we've already seen one instance of the looping 384 // particle set the counter to 1, otherwise set it 385 // to 0. 386 state[2] = (elemIndex == o.elemIndex) ? 1 : 0; 387 } 388 } 389 } 390 391 state[0] = nextState; 392 return matchingDecl; 393 } // oneTransition(QName, int[], SubstitutionGroupHandler): Object 394 395 Object findMatchingDecl(QName curElem, SubstitutionGroupHandler subGroupHandler) { 396 Object matchingDecl = null; 397 398 for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) { 399 int type = fElemMapType[elemIndex] ; 400 if (type == XSParticleDecl.PARTICLE_ELEMENT) { 401 matchingDecl = subGroupHandler.getMatchingElemDecl(curElem, (XSElementDecl)fElemMap[elemIndex]); 402 if (matchingDecl != null) { 403 return matchingDecl; 404 } 405 } 406 else if (type == XSParticleDecl.PARTICLE_WILDCARD) { 407 if(((XSWildcardDecl)fElemMap[elemIndex]).allowNamespace(curElem.uri)) 408 return fElemMap[elemIndex]; 409 } 410 } 411 412 return null; 413 } // findMatchingDecl(QName, SubstitutionGroupHandler): Object 414 415 Object findMatchingDecl(QName curElem, int[] state, SubstitutionGroupHandler subGroupHandler, int elemIndex) { 416 417 int curState = state[0]; 418 int nextState = 0; 419 Object matchingDecl = null; 420 421 while (++elemIndex < fElemMapSize) { 422 nextState = fTransTable[curState][elemIndex]; 423 if (nextState == -1) 424 continue; 425 int type = fElemMapType[elemIndex] ; 426 if (type == XSParticleDecl.PARTICLE_ELEMENT) { 427 matchingDecl = subGroupHandler.getMatchingElemDecl(curElem, (XSElementDecl)fElemMap[elemIndex]); 428 if (matchingDecl != null) { 429 break; 430 } 431 } 432 else if (type == XSParticleDecl.PARTICLE_WILDCARD) { 433 if (((XSWildcardDecl)fElemMap[elemIndex]).allowNamespace(curElem.uri)) { 434 matchingDecl = fElemMap[elemIndex]; 435 break; 436 } 437 } 438 } 439 440 // if we still can't find a match, set the state to FIRST_ERROR and return null 441 if (elemIndex == fElemMapSize) { 442 state[1] = state[0]; 443 state[0] = XSCMValidator.FIRST_ERROR; 444 return findMatchingDecl(curElem, subGroupHandler); 445 } 446 447 // if we found a match, set the next state and reset the 448 // counter if the next state is a counting state. 449 state[0] = nextState; 450 final Occurence o = fCountingStates[nextState]; 451 if (o != null) { 452 state[2] = (elemIndex == o.elemIndex) ? 1 : 0; 453 } 454 return matchingDecl; 455 } // findMatchingDecl(QName, int[], SubstitutionGroupHandler, int): Object 456 457 // This method returns the start states of the content model. 458 public int[] startContentModel() { 459 // Clear all constant space algorithm counters in use 460 for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) { 461 if (fElemMapCounter[elemIndex] != -1) { 462 fElemMapCounter[elemIndex] = 0; 463 } 464 } 465 // [0] : the current state 466 // [1] : if [0] is an error state then the 467 // last valid state before the error 468 // [2] : occurence counter for counting states 469 return new int [3]; 470 } // startContentModel():int[] 471 472 // this method returns whether the last state was a valid final state 473 public boolean endContentModel(int[] state) { 474 final int curState = state[0]; 475 if (fFinalStateFlags[curState]) { 476 if (fCountingStates != null) { 477 Occurence o = fCountingStates[curState]; 478 if (o != null && state[2] < o.minOccurs) { 479 // not enough loops on the current state to be considered final. 480 return false; 481 } 482 } 483 return true; 484 } 485 return false; 486 } // endContentModel(int[]): boolean 487 488 // Killed off whatCanGoHere; we may need it for DOM canInsert(...) etc., 489 // but we can put it back later. 490 491 // 492 // Private methods 493 // 494 495 /** 496 * Builds the internal DFA transition table from the given syntax tree. 497 * 498 * @param syntaxTree The syntax tree. 499 * 500 * @exception RuntimeException Thrown if DFA cannot be built. 501 */ 502 private void buildDFA(CMNode syntaxTree) { 503 // 504 // The first step we need to take is to rewrite the content model 505 // using our CMNode objects, and in the process get rid of any 506 // repetition short cuts, converting them into '*' style repetitions 507 // or getting rid of repetitions altogether. 508 // 509 // The conversions done are: 510 // 511 // x+ -> (x|x*) 512 // x? -> (x|epsilon) 513 // 514 // This is a relatively complex scenario. What is happening is that 515 // we create a top level binary node of which the special EOC value 516 // is set as the right side node. The the left side is set to the 517 // rewritten syntax tree. The source is the original content model 518 // info from the decl pool. The rewrite is done by buildSyntaxTree() 519 // which recurses the decl pool's content of the element and builds 520 // a new tree in the process. 521 // 522 // Note that, during this operation, we set each non-epsilon leaf 523 // node's DFA state position and count the number of such leafs, which 524 // is left in the fLeafCount member. 525 // 526 // The nodeTmp object is passed in just as a temp node to use during 527 // the recursion. Otherwise, we'd have to create a new node on every 528 // level of recursion, which would be piggy in Java (as is everything 529 // for that matter.) 530 // 531 532 /* MODIFIED (Jan, 2001) 533 * 534 * Use following rules. 535 * nullable(x+) := nullable(x), first(x+) := first(x), last(x+) := last(x) 536 * nullable(x?) := true, first(x?) := first(x), last(x?) := last(x) 537 * 538 * The same computation of follow as x* is applied to x+ 539 * 540 * The modification drastically reduces computation time of 541 * "(a, (b, a+, (c, (b, a+)+, a+, (d, (c, (b, a+)+, a+)+, (b, a+)+, a+)+)+)+)+" 542 */ 543 544 // 545 // And handle specially the EOC node, which also must be numbered 546 // and counted as a non-epsilon leaf node. It could not be handled 547 // in the above tree build because it was created before all that 548 // started. We save the EOC position since its used during the DFA 549 // building loop. 550 // 551 int EOCPos = fLeafCount; 552 XSCMLeaf nodeEOC = new XSCMLeaf(XSParticleDecl.PARTICLE_ELEMENT, null, -1, fLeafCount++); 553 fHeadNode = new XSCMBinOp( 554 XSModelGroupImpl.MODELGROUP_SEQUENCE, 555 syntaxTree, 556 nodeEOC 557 ); 558 559 // 560 // Ok, so now we have to iterate the new tree and do a little more 561 // work now that we know the leaf count. One thing we need to do is 562 // to calculate the first and last position sets of each node. This 563 // is cached away in each of the nodes. 564 // 565 // Along the way we also set the leaf count in each node as the 566 // maximum state count. They must know this in order to create their 567 // first/last pos sets. 568 // 569 // We also need to build an array of references to the non-epsilon 570 // leaf nodes. Since we iterate it in the same way as before, this 571 // will put them in the array according to their position values. 572 // 573 fLeafList = new XSCMLeaf[fLeafCount]; 574 fLeafListType = new int[fLeafCount]; 575 postTreeBuildInit(fHeadNode); 576 577 // 578 // And, moving onward... We now need to build the follow position 579 // sets for all the nodes. So we allocate an array of state sets, 580 // one for each leaf node (i.e. each DFA position.) 581 // 582 fFollowList = new CMStateSet[fLeafCount]; 583 for (int index = 0; index < fLeafCount; index++) 584 fFollowList[index] = new CMStateSet(fLeafCount); 585 calcFollowList(fHeadNode); 586 // 587 // And finally the big push... Now we build the DFA using all the 588 // states and the tree we've built up. First we set up the various 589 // data structures we are going to use while we do this. 590 // 591 // First of all we need an array of unique element names in our 592 // content model. For each transition table entry, we need a set of 593 // contiguous indices to represent the transitions for a particular 594 // input element. So we need to a zero based range of indexes that 595 // map to element types. This element map provides that mapping. 596 // 597 fElemMap = new Object[fLeafCount]; 598 fElemMapType = new int[fLeafCount]; 599 fElemMapId = new int[fLeafCount]; 600 601 fElemMapCounter = new int[fLeafCount]; 602 fElemMapCounterLowerBound = new int[fLeafCount]; 603 fElemMapCounterUpperBound = new int[fLeafCount]; 604 605 fElemMapSize = 0; 606 Occurence [] elemOccurenceMap = null; 607 608 for (int outIndex = 0; outIndex < fLeafCount; outIndex++) { 609 // optimization from Henry Zongaro: 610 //fElemMap[outIndex] = new Object (); 611 fElemMap[outIndex] = null; 612 613 int inIndex = 0; 614 final int id = fLeafList[outIndex].getParticleId(); 615 for (; inIndex < fElemMapSize; inIndex++) { 616 if (id == fElemMapId[inIndex]) 617 break; 618 } 619 620 // If it was not in the list, then add it, if not the EOC node 621 if (inIndex == fElemMapSize) { 622 XSCMLeaf leaf = fLeafList[outIndex]; 623 fElemMap[fElemMapSize] = leaf.getLeaf(); 624 if (leaf instanceof XSCMRepeatingLeaf) { 625 if (elemOccurenceMap == null) { 626 elemOccurenceMap = new Occurence[fLeafCount]; 627 } 628 elemOccurenceMap[fElemMapSize] = new Occurence((XSCMRepeatingLeaf) leaf, fElemMapSize); 629 } 630 631 fElemMapType[fElemMapSize] = fLeafListType[outIndex]; 632 fElemMapId[fElemMapSize] = id; 633 634 // Init counters and bounds for a{n,m} algorithm 635 int[] bounds = (int[]) leaf.getUserData(); 636 if (bounds != null) { 637 fElemMapCounter[fElemMapSize] = 0; 638 fElemMapCounterLowerBound[fElemMapSize] = bounds[0]; 639 fElemMapCounterUpperBound[fElemMapSize] = bounds[1]; 640 } else { 641 fElemMapCounter[fElemMapSize] = -1; 642 fElemMapCounterLowerBound[fElemMapSize] = -1; 643 fElemMapCounterUpperBound[fElemMapSize] = -1; 644 } 645 646 fElemMapSize++; 647 } 648 } 649 650 // the last entry in the element map must be the EOC element. 651 // remove it from the map. 652 if (DEBUG) { 653 if (fElemMapId[fElemMapSize-1] != -1) 654 System.err.println("interal error in DFA: last element is not EOC."); 655 } 656 fElemMapSize--; 657 658 /*** 659 * Optimization(Jan, 2001); We sort fLeafList according to 660 * elemIndex which is *uniquely* associated to each leaf. 661 * We are *assuming* that each element appears in at least one leaf. 662 **/ 663 664 int[] fLeafSorter = new int[fLeafCount + fElemMapSize]; 665 int fSortCount = 0; 666 667 for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) { 668 final int id = fElemMapId[elemIndex]; 669 for (int leafIndex = 0; leafIndex < fLeafCount; leafIndex++) { 670 if (id == fLeafList[leafIndex].getParticleId()) 671 fLeafSorter[fSortCount++] = leafIndex; 672 } 673 fLeafSorter[fSortCount++] = -1; 674 } 675 676 /* Optimization(Jan, 2001) */ 677 678 // 679 // Next lets create some arrays, some that hold transient 680 // information during the DFA build and some that are permament. 681 // These are kind of sticky since we cannot know how big they will 682 // get, but we don't want to use any Java collections because of 683 // performance. 684 // 685 // Basically they will probably be about fLeafCount*2 on average, 686 // but can be as large as 2^(fLeafCount*2), worst case. So we start 687 // with fLeafCount*4 as a middle ground. This will be very unlikely 688 // to ever have to expand, though it if does, the overhead will be 689 // somewhat ugly. 690 // 691 int curArraySize = fLeafCount * 4; 692 CMStateSet[] statesToDo = new CMStateSet[curArraySize]; 693 fFinalStateFlags = new boolean[curArraySize]; 694 fTransTable = new int[curArraySize][]; 695 696 // 697 // Ok we start with the initial set as the first pos set of the 698 // head node (which is the seq node that holds the content model 699 // and the EOC node.) 700 // 701 CMStateSet setT = fHeadNode.firstPos(); 702 703 // 704 // Init our two state flags. Basically the unmarked state counter 705 // is always chasing the current state counter. When it catches up, 706 // that means we made a pass through that did not add any new states 707 // to the lists, at which time we are done. We could have used a 708 // expanding array of flags which we used to mark off states as we 709 // complete them, but this is easier though less readable maybe. 710 // 711 int unmarkedState = 0; 712 int curState = 0; 713 714 // 715 // Init the first transition table entry, and put the initial state 716 // into the states to do list, then bump the current state. 717 // 718 fTransTable[curState] = makeDefStateList(); 719 statesToDo[curState] = setT; 720 curState++; 721 722 /* Optimization(Jan, 2001); This is faster for 723 * a large content model such as, "(t001+|t002+|.... |t500+)". 724 */ 725 726 Map<CMStateSet, Integer> stateTable = new HashMap<>(); 727 728 /* Optimization(Jan, 2001) */ 729 730 // 731 // Ok, almost done with the algorithm... We now enter the 732 // loop where we go until the states done counter catches up with 733 // the states to do counter. 734 // 735 while (unmarkedState < curState) { 736 // 737 // Get the first unmarked state out of the list of states to do. 738 // And get the associated transition table entry. 739 // 740 setT = statesToDo[unmarkedState]; 741 int[] transEntry = fTransTable[unmarkedState]; 742 743 // Mark this one final if it contains the EOC state 744 fFinalStateFlags[unmarkedState] = setT.getBit(EOCPos); 745 746 // Bump up the unmarked state count, marking this state done 747 unmarkedState++; 748 749 // Loop through each possible input symbol in the element map 750 CMStateSet newSet = null; 751 /* Optimization(Jan, 2001) */ 752 int sorterIndex = 0; 753 /* Optimization(Jan, 2001) */ 754 for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) { 755 // 756 // Build up a set of states which is the union of all of 757 // the follow sets of DFA positions that are in the current 758 // state. If we gave away the new set last time through then 759 // create a new one. Otherwise, zero out the existing one. 760 // 761 if (newSet == null) 762 newSet = new CMStateSet(fLeafCount); 763 else 764 newSet.zeroBits(); 765 766 /* Optimization(Jan, 2001) */ 767 int leafIndex = fLeafSorter[sorterIndex++]; 768 769 while (leafIndex != -1) { 770 // If this leaf index (DFA position) is in the current set... 771 if (setT.getBit(leafIndex)) { 772 // 773 // If this leaf is the current input symbol, then we 774 // want to add its follow list to the set of states to 775 // transition to from the current state. 776 // 777 newSet.union(fFollowList[leafIndex]); 778 } 779 780 leafIndex = fLeafSorter[sorterIndex++]; 781 } 782 /* Optimization(Jan, 2001) */ 783 784 // 785 // If this new set is not empty, then see if its in the list 786 // of states to do. If not, then add it. 787 // 788 if (!newSet.isEmpty()) { 789 // 790 // Search the 'states to do' list to see if this new 791 // state set is already in there. 792 // 793 794 /* Optimization(Jan, 2001) */ 795 Integer stateObj = stateTable.get(newSet); 796 int stateIndex = (stateObj == null ? curState : stateObj); 797 /* Optimization(Jan, 2001) */ 798 799 // If we did not find it, then add it 800 if (stateIndex == curState) { 801 // 802 // Put this new state into the states to do and init 803 // a new entry at the same index in the transition 804 // table. 805 // 806 statesToDo[curState] = newSet; 807 fTransTable[curState] = makeDefStateList(); 808 809 /* Optimization(Jan, 2001) */ 810 stateTable.put(newSet, curState); 811 /* Optimization(Jan, 2001) */ 812 813 // We now have a new state to do so bump the count 814 curState++; 815 816 // 817 // Null out the new set to indicate we adopted it. 818 // This will cause the creation of a new set on the 819 // next time around the loop. 820 // 821 newSet = null; 822 } 823 824 // 825 // Now set this state in the transition table's entry 826 // for this element (using its index), with the DFA 827 // state we will move to from the current state when we 828 // see this input element. 829 // 830 transEntry[elemIndex] = stateIndex; 831 832 // Expand the arrays if we're full 833 if (curState == curArraySize) { 834 // 835 // Yikes, we overflowed the initial array size, so 836 // we've got to expand all of these arrays. So adjust 837 // up the size by 50% and allocate new arrays. 838 // 839 final int newSize = (int)(curArraySize * 1.5); 840 CMStateSet[] newToDo = new CMStateSet[newSize]; 841 boolean[] newFinalFlags = new boolean[newSize]; 842 int[][] newTransTable = new int[newSize][]; 843 844 // Copy over all of the existing content 845 System.arraycopy(statesToDo, 0, newToDo, 0, curArraySize); 846 System.arraycopy(fFinalStateFlags, 0, newFinalFlags, 0, curArraySize); 847 System.arraycopy(fTransTable, 0, newTransTable, 0, curArraySize); 848 849 // Store the new array size 850 curArraySize = newSize; 851 statesToDo = newToDo; 852 fFinalStateFlags = newFinalFlags; 853 fTransTable = newTransTable; 854 } 855 } 856 } 857 } 858 859 // 860 // Fill in the occurence information for each looping state 861 // if we're using counters. 862 // 863 if (elemOccurenceMap != null) { 864 fCountingStates = new Occurence[curState]; 865 for (int i = 0; i < curState; ++i) { 866 int [] transitions = fTransTable[i]; 867 for (int j = 0; j < transitions.length; ++j) { 868 if (i == transitions[j]) { 869 fCountingStates[i] = elemOccurenceMap[j]; 870 break; 871 } 872 } 873 } 874 } 875 876 // 877 // And now we can say bye bye to the temp representation since we've 878 // built the DFA. 879 // 880 if (DEBUG_VALIDATE_CONTENT) 881 dumpTree(fHeadNode, 0); 882 fHeadNode = null; 883 fLeafList = null; 884 fFollowList = null; 885 fLeafListType = null; 886 fElemMapId = null; 887 } 888 889 /** 890 * Calculates the follow list of the current node. 891 * 892 * @param nodeCur The curent node. 893 * 894 * @exception RuntimeException Thrown if follow list cannot be calculated. 895 */ 896 private void calcFollowList(CMNode nodeCur) { 897 // Recurse as required 898 if (nodeCur.type() == XSModelGroupImpl.MODELGROUP_CHOICE) { 899 // Recurse only 900 calcFollowList(((XSCMBinOp)nodeCur).getLeft()); 901 calcFollowList(((XSCMBinOp)nodeCur).getRight()); 902 } 903 else if (nodeCur.type() == XSModelGroupImpl.MODELGROUP_SEQUENCE) { 904 // Recurse first 905 calcFollowList(((XSCMBinOp)nodeCur).getLeft()); 906 calcFollowList(((XSCMBinOp)nodeCur).getRight()); 907 908 // 909 // Now handle our level. We use our left child's last pos 910 // set and our right child's first pos set, so go ahead and 911 // get them ahead of time. 912 // 913 final CMStateSet last = ((XSCMBinOp)nodeCur).getLeft().lastPos(); 914 final CMStateSet first = ((XSCMBinOp)nodeCur).getRight().firstPos(); 915 916 // 917 // Now, for every position which is in our left child's last set 918 // add all of the states in our right child's first set to the 919 // follow set for that position. 920 // 921 for (int index = 0; index < fLeafCount; index++) { 922 if (last.getBit(index)) 923 fFollowList[index].union(first); 924 } 925 } 926 else if (nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_MORE 927 || nodeCur.type() == XSParticleDecl.PARTICLE_ONE_OR_MORE) { 928 // Recurse first 929 calcFollowList(((XSCMUniOp)nodeCur).getChild()); 930 931 // 932 // Now handle our level. We use our own first and last position 933 // sets, so get them up front. 934 // 935 final CMStateSet first = nodeCur.firstPos(); 936 final CMStateSet last = nodeCur.lastPos(); 937 938 // 939 // For every position which is in our last position set, add all 940 // of our first position states to the follow set for that 941 // position. 942 // 943 for (int index = 0; index < fLeafCount; index++) { 944 if (last.getBit(index)) 945 fFollowList[index].union(first); 946 } 947 } 948 949 else if (nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_ONE) { 950 // Recurse only 951 calcFollowList(((XSCMUniOp)nodeCur).getChild()); 952 } 953 954 } 955 956 /** 957 * Dumps the tree of the current node to standard output. 958 * 959 * @param nodeCur The current node. 960 * @param level The maximum levels to output. 961 * 962 * @exception RuntimeException Thrown on error. 963 */ 964 private void dumpTree(CMNode nodeCur, int level) { 965 for (int index = 0; index < level; index++) 966 System.out.print(" "); 967 968 int type = nodeCur.type(); 969 970 switch(type ) { 971 972 case XSModelGroupImpl.MODELGROUP_CHOICE: 973 case XSModelGroupImpl.MODELGROUP_SEQUENCE: { 974 if (type == XSModelGroupImpl.MODELGROUP_CHOICE) 975 System.out.print("Choice Node "); 976 else 977 System.out.print("Seq Node "); 978 979 if (nodeCur.isNullable()) 980 System.out.print("Nullable "); 981 982 System.out.print("firstPos="); 983 System.out.print(nodeCur.firstPos().toString()); 984 System.out.print(" lastPos="); 985 System.out.println(nodeCur.lastPos().toString()); 986 987 dumpTree(((XSCMBinOp)nodeCur).getLeft(), level+1); 988 dumpTree(((XSCMBinOp)nodeCur).getRight(), level+1); 989 break; 990 } 991 case XSParticleDecl.PARTICLE_ZERO_OR_MORE: 992 case XSParticleDecl.PARTICLE_ONE_OR_MORE: 993 case XSParticleDecl.PARTICLE_ZERO_OR_ONE: { 994 System.out.print("Rep Node "); 995 996 if (nodeCur.isNullable()) 997 System.out.print("Nullable "); 998 999 System.out.print("firstPos="); 1000 System.out.print(nodeCur.firstPos().toString()); 1001 System.out.print(" lastPos="); 1002 System.out.println(nodeCur.lastPos().toString()); 1003 1004 dumpTree(((XSCMUniOp)nodeCur).getChild(), level+1); 1005 break; 1006 } 1007 case XSParticleDecl.PARTICLE_ELEMENT: { 1008 System.out.print 1009 ( 1010 "Leaf: (pos=" 1011 + ((XSCMLeaf)nodeCur).getPosition() 1012 + "), " 1013 + "(elemIndex=" 1014 + ((XSCMLeaf)nodeCur).getLeaf() 1015 + ") " 1016 ); 1017 1018 if (nodeCur.isNullable()) 1019 System.out.print(" Nullable "); 1020 1021 System.out.print("firstPos="); 1022 System.out.print(nodeCur.firstPos().toString()); 1023 System.out.print(" lastPos="); 1024 System.out.println(nodeCur.lastPos().toString()); 1025 break; 1026 } 1027 case XSParticleDecl.PARTICLE_WILDCARD: 1028 System.out.print("Any Node: "); 1029 1030 System.out.print("firstPos="); 1031 System.out.print(nodeCur.firstPos().toString()); 1032 System.out.print(" lastPos="); 1033 System.out.println(nodeCur.lastPos().toString()); 1034 break; 1035 default: { 1036 throw new RuntimeException("ImplementationMessages.VAL_NIICM"); 1037 } 1038 } 1039 1040 } 1041 1042 1043 /** 1044 * -1 is used to represent bad transitions in the transition table 1045 * entry for each state. So each entry is initialized to an all -1 1046 * array. This method creates a new entry and initializes it. 1047 */ 1048 private int[] makeDefStateList() 1049 { 1050 int[] retArray = new int[fElemMapSize]; 1051 for (int index = 0; index < fElemMapSize; index++) 1052 retArray[index] = -1; 1053 return retArray; 1054 } 1055 1056 /** Post tree build initialization. */ 1057 private void postTreeBuildInit(CMNode nodeCur) throws RuntimeException { 1058 // Set the maximum states on this node 1059 nodeCur.setMaxStates(fLeafCount); 1060 1061 XSCMLeaf leaf = null; 1062 int pos = 0; 1063 // Recurse as required 1064 if (nodeCur.type() == XSParticleDecl.PARTICLE_WILDCARD) { 1065 leaf = (XSCMLeaf)nodeCur; 1066 pos = leaf.getPosition(); 1067 fLeafList[pos] = leaf; 1068 fLeafListType[pos] = XSParticleDecl.PARTICLE_WILDCARD; 1069 } 1070 else if ((nodeCur.type() == XSModelGroupImpl.MODELGROUP_CHOICE) || 1071 (nodeCur.type() == XSModelGroupImpl.MODELGROUP_SEQUENCE)) { 1072 postTreeBuildInit(((XSCMBinOp)nodeCur).getLeft()); 1073 postTreeBuildInit(((XSCMBinOp)nodeCur).getRight()); 1074 } 1075 else if (nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_MORE || 1076 nodeCur.type() == XSParticleDecl.PARTICLE_ONE_OR_MORE || 1077 nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_ONE) { 1078 postTreeBuildInit(((XSCMUniOp)nodeCur).getChild()); 1079 } 1080 else if (nodeCur.type() == XSParticleDecl.PARTICLE_ELEMENT) { 1081 // Put this node in the leaf list at the current index if its 1082 // a non-epsilon leaf. 1083 leaf = (XSCMLeaf)nodeCur; 1084 pos = leaf.getPosition(); 1085 fLeafList[pos] = leaf; 1086 fLeafListType[pos] = XSParticleDecl.PARTICLE_ELEMENT; 1087 } 1088 else { 1089 throw new RuntimeException("ImplementationMessages.VAL_NIICM"); 1090 } 1091 } 1092 1093 /** 1094 * check whether this content violates UPA constraint. 1095 * 1096 * @param subGroupHandler the substitution group handler 1097 * @return true if this content model contains other or list wildcard 1098 */ 1099 public boolean checkUniqueParticleAttribution(SubstitutionGroupHandler subGroupHandler) throws XMLSchemaException { 1100 // Unique Particle Attribution 1101 // store the conflict results between any two elements in fElemMap 1102 // 0: not compared; -1: no conflict; 1: conflict 1103 // initialize the conflict table (all 0 initially) 1104 byte conflictTable[][] = new byte[fElemMapSize][fElemMapSize]; 1105 1106 // for each state, check whether it has overlap transitions 1107 for (int i = 0; i < fTransTable.length && fTransTable[i] != null; i++) { 1108 for (int j = 0; j < fElemMapSize; j++) { 1109 for (int k = j+1; k < fElemMapSize; k++) { 1110 if (fTransTable[i][j] != -1 && 1111 fTransTable[i][k] != -1) { 1112 if (conflictTable[j][k] == 0) { 1113 if (XSConstraints.overlapUPA 1114 (fElemMap[j], fElemMap[k], 1115 subGroupHandler)) { 1116 if (fCountingStates != null) { 1117 Occurence o = fCountingStates[i]; 1118 // If "i" is a counting state and exactly one of the transitions 1119 // loops back to "i" then the two particles do not overlap if 1120 // minOccurs == maxOccurs. 1121 if (o != null && 1122 fTransTable[i][j] == i ^ fTransTable[i][k] == i && 1123 o.minOccurs == o.maxOccurs) { 1124 conflictTable[j][k] = (byte) -1; 1125 continue; 1126 } 1127 } 1128 conflictTable[j][k] = (byte) 1; 1129 } 1130 else { 1131 conflictTable[j][k] = (byte) -1; 1132 } 1133 } 1134 } 1135 } 1136 } 1137 } 1138 1139 // report all errors 1140 for (int i = 0; i < fElemMapSize; i++) { 1141 for (int j = 0; j < fElemMapSize; j++) { 1142 if (conflictTable[i][j] == 1) { 1143 //errors.newError("cos-nonambig", new Object[]{fElemMap[i].toString(), 1144 // fElemMap[j].toString()}); 1145 // REVISIT: do we want to report all errors? or just one? 1146 throw new XMLSchemaException("cos-nonambig", new Object[]{fElemMap[i].toString(), 1147 fElemMap[j].toString()}); 1148 } 1149 } 1150 } 1151 1152 // if there is a other or list wildcard, we need to check this CM 1153 // again, if this grammar is cached. 1154 for (int i = 0; i < fElemMapSize; i++) { 1155 if (fElemMapType[i] == XSParticleDecl.PARTICLE_WILDCARD) { 1156 XSWildcardDecl wildcard = (XSWildcardDecl)fElemMap[i]; 1157 if (wildcard.fType == XSWildcardDecl.NSCONSTRAINT_LIST || 1158 wildcard.fType == XSWildcardDecl.NSCONSTRAINT_NOT) { 1159 return true; 1160 } 1161 } 1162 } 1163 1164 return false; 1165 } 1166 1167 /** 1168 * Check which elements are valid to appear at this point. This method also 1169 * works if the state is in error, in which case it returns what should 1170 * have been seen. 1171 * 1172 * @param state the current state 1173 * @return a list whose entries are instances of 1174 * either XSWildcardDecl or XSElementDecl. 1175 */ 1176 public List<Object> whatCanGoHere(int[] state) { 1177 int curState = state[0]; 1178 if (curState < 0) 1179 curState = state[1]; 1180 Occurence o = (fCountingStates != null) ? 1181 fCountingStates[curState] : null; 1182 int count = state[2]; 1183 1184 // Can be XSElementDecl or XSWildcardDecl, but eventually the content is 1185 // only used to evaluate toString 1186 List<Object> ret = new ArrayList<>(); 1187 for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) { 1188 int nextState = fTransTable[curState][elemIndex]; 1189 if (nextState != -1) { 1190 if (o != null) { 1191 if (curState == nextState) { 1192 // Do not include transitions which loop back to the 1193 // current state if we've looped the maximum number 1194 // of times or greater. 1195 if (count >= o.maxOccurs && 1196 o.maxOccurs != SchemaSymbols.OCCURRENCE_UNBOUNDED) { 1197 continue; 1198 } 1199 } 1200 // Do not include transitions which advance past the 1201 // current state if we have not looped enough times. 1202 else if (count < o.minOccurs) { 1203 continue; 1204 } 1205 } 1206 ret.add(fElemMap[elemIndex]); 1207 } 1208 } 1209 return ret; 1210 } 1211 1212 /** 1213 * Used by constant space algorithm for a{n,m} for n > 1 and 1214 * m <= unbounded. Called by a validator if validation of 1215 * countent model succeeds after subsuming a{n,m} to a* 1216 * (or a+) to check the n and m bounds. 1217 * Returns <code>null</code> if validation of bounds is 1218 * successful. Returns a list of strings with error info 1219 * if not. Even entries in list returned are error codes 1220 * (used to look up properties) and odd entries are parameters 1221 * to be passed when formatting error message. Each parameter 1222 * is associated with the error code that preceeds it in 1223 * the list. 1224 */ 1225 public List<String> checkMinMaxBounds() { 1226 List<String> result = null; 1227 for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) { 1228 int count = fElemMapCounter[elemIndex]; 1229 if (count == -1) { 1230 continue; 1231 } 1232 final int minOccurs = fElemMapCounterLowerBound[elemIndex]; 1233 final int maxOccurs = fElemMapCounterUpperBound[elemIndex]; 1234 if (count < minOccurs) { 1235 if (result == null) result = new ArrayList<>(); 1236 result.add("cvc-complex-type.2.4.b"); 1237 result.add("{" + fElemMap[elemIndex] + "}"); 1238 } 1239 if (maxOccurs != -1 && count > maxOccurs) { 1240 if (result == null) result = new ArrayList<>(); 1241 result.add("cvc-complex-type.2.4.d.1"); 1242 result.add("{" + fElemMap[elemIndex] + "}"); 1243 } 1244 } 1245 return result; 1246 } 1247 1248 public int [] occurenceInfo(int[] state) { 1249 if (fCountingStates != null) { 1250 int curState = state[0]; 1251 if (curState < 0) { 1252 curState = state[1]; 1253 } 1254 Occurence o = fCountingStates[curState]; 1255 if (o != null) { 1256 int [] occurenceInfo = new int[4]; 1257 occurenceInfo[0] = o.minOccurs; 1258 occurenceInfo[1] = o.maxOccurs; 1259 occurenceInfo[2] = state[2]; 1260 occurenceInfo[3] = o.elemIndex; 1261 return occurenceInfo; 1262 } 1263 } 1264 return null; 1265 } 1266 1267 public String getTermName(int termId) { 1268 Object term = fElemMap[termId]; 1269 return (term != null) ? term.toString() : null; 1270 } 1271 1272 public boolean isCompactedForUPA() { 1273 return fIsCompactedForUPA; 1274 } 1275 } // class DFAContentModel