1 /* 2 * Copyright (c) 2006, 2009, Oracle and/or its affiliates. All rights reserved. 3 */ 4 /* 5 * Licensed to the Apache Software Foundation (ASF) under one or more 6 * contributor license agreements. See the NOTICE file distributed with 7 * this work for additional information regarding copyright ownership. 8 * The ASF licenses this file to You under the Apache License, Version 2.0 9 * (the "License"); you may not use this file except in compliance with 10 * the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 package com.sun.org.apache.xerces.internal.impl.xs.models; 22 23 import com.sun.org.apache.xerces.internal.xni.QName; 24 import com.sun.org.apache.xerces.internal.impl.dtd.models.CMNode; 25 import com.sun.org.apache.xerces.internal.impl.dtd.models.CMStateSet; 26 import com.sun.org.apache.xerces.internal.impl.xs.SchemaSymbols; 27 import com.sun.org.apache.xerces.internal.impl.xs.SubstitutionGroupHandler; 28 import com.sun.org.apache.xerces.internal.impl.xs.XSElementDecl; 29 import com.sun.org.apache.xerces.internal.impl.xs.XSParticleDecl; 30 import com.sun.org.apache.xerces.internal.impl.xs.XSModelGroupImpl; 31 import com.sun.org.apache.xerces.internal.impl.xs.XSWildcardDecl; 32 import com.sun.org.apache.xerces.internal.impl.xs.XMLSchemaException; 33 import com.sun.org.apache.xerces.internal.impl.xs.XSConstraints; 34 import java.util.ArrayList; 35 import java.util.HashMap; 36 37 /** 38 * DFAContentModel is the implementation of XSCMValidator that does 39 * all of the non-trivial element content validation. This class does 40 * the conversion from the regular expression to the DFA that 41 * it then uses in its validation algorithm. 42 * 43 * @xerces.internal 44 * 45 * @author Neil Graham, IBM 46 */ 47 public class XSDFACM 48 implements XSCMValidator { 49 50 // 51 // Constants 52 // 53 private static final boolean DEBUG = false; 54 55 // special strings 56 57 // debugging 58 59 /** Set to true to debug content model validation. */ 60 private static final boolean DEBUG_VALIDATE_CONTENT = false; 61 62 // 63 // Data 64 // 65 66 /** 67 * This is the map of unique input symbol elements to indices into 68 * each state's per-input symbol transition table entry. This is part 69 * of the built DFA information that must be kept around to do the 70 * actual validation. Note tat since either XSElementDecl or XSParticleDecl object 71 * can live here, we've got to use an Object. 72 */ 73 private Object fElemMap[] = null; 74 75 /** 76 * This is a map of whether the element map contains information 77 * related to ANY models. 78 */ 79 private int fElemMapType[] = null; 80 81 /** 82 * id of the unique input symbol 83 */ 84 private int fElemMapId[] = null; 85 86 /** The element map size. */ 87 private int fElemMapSize = 0; 88 89 /** 90 * This is an array of booleans, one per state (there are 91 * fTransTableSize states in the DFA) that indicates whether that 92 * state is a final state. 93 */ 94 private boolean fFinalStateFlags[] = null; 95 96 /** 97 * The list of follow positions for each NFA position (i.e. for each 98 * non-epsilon leaf node.) This is only used during the building of 99 * the DFA, and is let go afterwards. 100 */ 101 private CMStateSet fFollowList[] = null; 102 103 /** 104 * This is the head node of our intermediate representation. It is 105 * only non-null during the building of the DFA (just so that it 106 * does not have to be passed all around.) Once the DFA is built, 107 * this is no longer required so its nulled out. 108 */ 109 private CMNode fHeadNode = null; 110 111 /** 112 * The count of leaf nodes. This is an important number that set some 113 * limits on the sizes of data structures in the DFA process. 114 */ 115 private int fLeafCount = 0; 116 117 /** 118 * An array of non-epsilon leaf nodes, which is used during the DFA 119 * build operation, then dropped. 120 */ 121 private XSCMLeaf fLeafList[] = null; 122 123 /** Array mapping ANY types to the leaf list. */ 124 private int fLeafListType[] = null; 125 126 /** 127 * This is the transition table that is the main by product of all 128 * of the effort here. It is an array of arrays of ints. The first 129 * dimension is the number of states we end up with in the DFA. The 130 * second dimensions is the number of unique elements in the content 131 * model (fElemMapSize). Each entry in the second dimension indicates 132 * the new state given that input for the first dimension's start 133 * state. 134 * <p> 135 * The fElemMap array handles mapping from element indexes to 136 * positions in the second dimension of the transition table. 137 */ 138 private int fTransTable[][] = null; 139 /** 140 * Array containing occurence information for looping states 141 * which use counters to check minOccurs/maxOccurs. 142 */ 143 private Occurence [] fCountingStates = null; 144 static final class Occurence { 145 final int minOccurs; 146 final int maxOccurs; 147 final int elemIndex; 148 public Occurence (XSCMRepeatingLeaf leaf, int elemIndex) { 149 minOccurs = leaf.getMinOccurs(); 150 maxOccurs = leaf.getMaxOccurs(); 151 this.elemIndex = elemIndex; 152 } 153 public String toString() { 154 return "minOccurs=" + minOccurs 155 + ";maxOccurs=" + 156 ((maxOccurs != SchemaSymbols.OCCURRENCE_UNBOUNDED) 157 ? Integer.toString(maxOccurs) : "unbounded"); 158 } 159 } 160 161 /** 162 * The number of valid entries in the transition table, and in the other 163 * related tables such as fFinalStateFlags. 164 */ 165 private int fTransTableSize = 0; 166 167 private boolean fIsCompactedForUPA; 168 169 /** 170 * Array of counters for all the for elements (or wildcards) 171 * of the form a{n,m} where n > 1 and m <= unbounded. Used 172 * to count the a's to later check against n and m. Counter 173 * set to -1 if element (or wildcard) not optimized by 174 * constant space algorithm. 175 */ 176 private int fElemMapCounter[]; 177 178 /** 179 * Array of lower bounds for all the for elements (or wildcards) 180 * of the form a{n,m} where n > 1 and m <= unbounded. This array 181 * stores the n's for those elements (or wildcards) for which 182 * the constant space algorithm applies (or -1 otherwise). 183 */ 184 private int fElemMapCounterLowerBound[]; 185 186 /** 187 * Array of upper bounds for all the for elements (or wildcards) 188 * of the form a{n,m} where n > 1 and m <= unbounded. This array 189 * stores the n's for those elements (or wildcards) for which 190 * the constant space algorithm applies, or -1 if algorithm does 191 * not apply or m = unbounded. 192 */ 193 private int fElemMapCounterUpperBound[]; // -1 if no upper bound 194 195 // temp variables 196 197 // 198 // Constructors 199 // 200 201 /** 202 * Constructs a DFA content model. 203 * 204 * @param syntaxTree The syntax tree of the content model. 205 * @param leafCount The number of leaves. 206 * 207 * @exception RuntimeException Thrown if DFA can't be built. 208 */ 209 210 public XSDFACM(CMNode syntaxTree, int leafCount) { 211 212 // Store away our index and pools in members 213 fLeafCount = leafCount; 214 215 // 216 // Create some string pool indexes that represent the names of some 217 // magical nodes in the syntax tree. 218 // (already done in static initialization... 219 // 220 221 // 222 // Ok, so lets grind through the building of the DFA. This method 223 // handles the high level logic of the algorithm, but it uses a 224 // number of helper classes to do its thing. 225 // 226 // In order to avoid having hundreds of references to the error and 227 // string handlers around, this guy and all of his helper classes 228 // just throw a simple exception and we then pass it along. 229 // 230 231 if(DEBUG_VALIDATE_CONTENT) { 232 XSDFACM.time -= System.currentTimeMillis(); 233 } 234 235 buildDFA(syntaxTree); 236 237 if(DEBUG_VALIDATE_CONTENT) { 238 XSDFACM.time += System.currentTimeMillis(); 239 System.out.println("DFA build: " + XSDFACM.time + "ms"); 240 } 241 } 242 243 private static long time = 0; 244 245 // 246 // XSCMValidator methods 247 // 248 249 /** 250 * check whether the given state is one of the final states 251 * 252 * @param state the state to check 253 * 254 * @return whether it's a final state 255 */ 256 public boolean isFinalState (int state) { 257 return (state < 0)? false : 258 fFinalStateFlags[state]; 259 } 260 261 /** 262 * one transition only 263 * 264 * @param curElem The current element's QName 265 * @param state stack to store the previous state 266 * @param subGroupHandler the substitution group handler 267 * 268 * @return null if transition is invalid; otherwise the Object corresponding to the 269 * XSElementDecl or XSWildcardDecl identified. Also, the 270 * state array will be modified to include the new state; this so that the validator can 271 * store it away. 272 * 273 * @exception RuntimeException thrown on error 274 */ 275 public Object oneTransition(QName curElem, int[] state, SubstitutionGroupHandler subGroupHandler) { 276 int curState = state[0]; 277 278 if(curState == XSCMValidator.FIRST_ERROR || curState == XSCMValidator.SUBSEQUENT_ERROR) { 279 // there was an error last time; so just go find correct Object in fElemmMap. 280 // ... after resetting state[0]. 281 if(curState == XSCMValidator.FIRST_ERROR) 282 state[0] = XSCMValidator.SUBSEQUENT_ERROR; 283 284 return findMatchingDecl(curElem, subGroupHandler); 285 } 286 287 int nextState = 0; 288 int elemIndex = 0; 289 Object matchingDecl = null; 290 291 for (; elemIndex < fElemMapSize; elemIndex++) { 292 nextState = fTransTable[curState][elemIndex]; 293 if (nextState == -1) 294 continue; 295 int type = fElemMapType[elemIndex] ; 296 if (type == XSParticleDecl.PARTICLE_ELEMENT) { 297 matchingDecl = subGroupHandler.getMatchingElemDecl(curElem, (XSElementDecl)fElemMap[elemIndex]); 298 if (matchingDecl != null) { 299 // Increment counter if constant space algorithm applies 300 if (fElemMapCounter[elemIndex] >= 0) { 301 fElemMapCounter[elemIndex]++; 302 } 303 break; 304 } 305 } 306 else if (type == XSParticleDecl.PARTICLE_WILDCARD) { 307 if (((XSWildcardDecl)fElemMap[elemIndex]).allowNamespace(curElem.uri)) { 308 matchingDecl = fElemMap[elemIndex]; 309 // Increment counter if constant space algorithm applies 310 if (fElemMapCounter[elemIndex] >= 0) { 311 fElemMapCounter[elemIndex]++; 312 } 313 break; 314 } 315 } 316 } 317 318 // if we still can't find a match, set the state to first_error 319 // and return null 320 if (elemIndex == fElemMapSize) { 321 state[1] = state[0]; 322 state[0] = XSCMValidator.FIRST_ERROR; 323 return findMatchingDecl(curElem, subGroupHandler); 324 } 325 326 if (fCountingStates != null) { 327 Occurence o = fCountingStates[curState]; 328 if (o != null) { 329 if (curState == nextState) { 330 if (++state[2] > o.maxOccurs && 331 o.maxOccurs != SchemaSymbols.OCCURRENCE_UNBOUNDED) { 332 // It's likely that we looped too many times on the current state 333 // however it's possible that we actually matched another particle 334 // which allows the same name. 335 // 336 // Consider: 337 // 338 // <xs:sequence> 339 // <xs:element name="foo" type="xs:string" minOccurs="3" maxOccurs="3"/> 340 // <xs:element name="foo" type="xs:string" fixed="bar"/> 341 // </xs:sequence> 342 // 343 // and 344 // 345 // <xs:sequence> 346 // <xs:element name="foo" type="xs:string" minOccurs="3" maxOccurs="3"/> 347 // <xs:any namespace="##any" processContents="skip"/> 348 // </xs:sequence> 349 // 350 // In the DFA there will be two transitions from the current state which 351 // allow "foo". Note that this is not a UPA violation. The ambiguity of which 352 // transition to take is resolved by the current value of the counter. Since 353 // we've already seen enough instances of the first "foo" perhaps there is 354 // another element declaration or wildcard deeper in the element map which 355 // matches. 356 return findMatchingDecl(curElem, state, subGroupHandler, elemIndex); 357 } 358 } 359 else if (state[2] < o.minOccurs) { 360 // not enough loops on the current state. 361 state[1] = state[0]; 362 state[0] = XSCMValidator.FIRST_ERROR; 363 return findMatchingDecl(curElem, subGroupHandler); 364 } 365 else { 366 // Exiting a counting state. If we're entering a new 367 // counting state, reset the counter. 368 o = fCountingStates[nextState]; 369 if (o != null) { 370 state[2] = (elemIndex == o.elemIndex) ? 1 : 0; 371 } 372 } 373 } 374 else { 375 o = fCountingStates[nextState]; 376 if (o != null) { 377 // Entering a new counting state. Reset the counter. 378 // If we've already seen one instance of the looping 379 // particle set the counter to 1, otherwise set it 380 // to 0. 381 state[2] = (elemIndex == o.elemIndex) ? 1 : 0; 382 } 383 } 384 } 385 386 state[0] = nextState; 387 return matchingDecl; 388 } // oneTransition(QName, int[], SubstitutionGroupHandler): Object 389 390 Object findMatchingDecl(QName curElem, SubstitutionGroupHandler subGroupHandler) { 391 Object matchingDecl = null; 392 393 for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) { 394 int type = fElemMapType[elemIndex] ; 395 if (type == XSParticleDecl.PARTICLE_ELEMENT) { 396 matchingDecl = subGroupHandler.getMatchingElemDecl(curElem, (XSElementDecl)fElemMap[elemIndex]); 397 if (matchingDecl != null) { 398 return matchingDecl; 399 } 400 } 401 else if (type == XSParticleDecl.PARTICLE_WILDCARD) { 402 if(((XSWildcardDecl)fElemMap[elemIndex]).allowNamespace(curElem.uri)) 403 return fElemMap[elemIndex]; 404 } 405 } 406 407 return null; 408 } // findMatchingDecl(QName, SubstitutionGroupHandler): Object 409 410 Object findMatchingDecl(QName curElem, int[] state, SubstitutionGroupHandler subGroupHandler, int elemIndex) { 411 412 int curState = state[0]; 413 int nextState = 0; 414 Object matchingDecl = null; 415 416 while (++elemIndex < fElemMapSize) { 417 nextState = fTransTable[curState][elemIndex]; 418 if (nextState == -1) 419 continue; 420 int type = fElemMapType[elemIndex] ; 421 if (type == XSParticleDecl.PARTICLE_ELEMENT) { 422 matchingDecl = subGroupHandler.getMatchingElemDecl(curElem, (XSElementDecl)fElemMap[elemIndex]); 423 if (matchingDecl != null) { 424 break; 425 } 426 } 427 else if (type == XSParticleDecl.PARTICLE_WILDCARD) { 428 if (((XSWildcardDecl)fElemMap[elemIndex]).allowNamespace(curElem.uri)) { 429 matchingDecl = fElemMap[elemIndex]; 430 break; 431 } 432 } 433 } 434 435 // if we still can't find a match, set the state to FIRST_ERROR and return null 436 if (elemIndex == fElemMapSize) { 437 state[1] = state[0]; 438 state[0] = XSCMValidator.FIRST_ERROR; 439 return findMatchingDecl(curElem, subGroupHandler); 440 } 441 442 // if we found a match, set the next state and reset the 443 // counter if the next state is a counting state. 444 state[0] = nextState; 445 final Occurence o = fCountingStates[nextState]; 446 if (o != null) { 447 state[2] = (elemIndex == o.elemIndex) ? 1 : 0; 448 } 449 return matchingDecl; 450 } // findMatchingDecl(QName, int[], SubstitutionGroupHandler, int): Object 451 452 // This method returns the start states of the content model. 453 public int[] startContentModel() { 454 // Clear all constant space algorithm counters in use 455 for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) { 456 if (fElemMapCounter[elemIndex] != -1) { 457 fElemMapCounter[elemIndex] = 0; 458 } 459 } 460 // [0] : the current state 461 // [1] : if [0] is an error state then the 462 // last valid state before the error 463 // [2] : occurence counter for counting states 464 return new int [3]; 465 } // startContentModel():int[] 466 467 // this method returns whether the last state was a valid final state 468 public boolean endContentModel(int[] state) { 469 final int curState = state[0]; 470 if (fFinalStateFlags[curState]) { 471 if (fCountingStates != null) { 472 Occurence o = fCountingStates[curState]; 473 if (o != null && state[2] < o.minOccurs) { 474 // not enough loops on the current state to be considered final. 475 return false; 476 } 477 } 478 return true; 479 } 480 return false; 481 } // endContentModel(int[]): boolean 482 483 // Killed off whatCanGoHere; we may need it for DOM canInsert(...) etc., 484 // but we can put it back later. 485 486 // 487 // Private methods 488 // 489 490 /** 491 * Builds the internal DFA transition table from the given syntax tree. 492 * 493 * @param syntaxTree The syntax tree. 494 * 495 * @exception RuntimeException Thrown if DFA cannot be built. 496 */ 497 private void buildDFA(CMNode syntaxTree) { 498 // 499 // The first step we need to take is to rewrite the content model 500 // using our CMNode objects, and in the process get rid of any 501 // repetition short cuts, converting them into '*' style repetitions 502 // or getting rid of repetitions altogether. 503 // 504 // The conversions done are: 505 // 506 // x+ -> (x|x*) 507 // x? -> (x|epsilon) 508 // 509 // This is a relatively complex scenario. What is happening is that 510 // we create a top level binary node of which the special EOC value 511 // is set as the right side node. The the left side is set to the 512 // rewritten syntax tree. The source is the original content model 513 // info from the decl pool. The rewrite is done by buildSyntaxTree() 514 // which recurses the decl pool's content of the element and builds 515 // a new tree in the process. 516 // 517 // Note that, during this operation, we set each non-epsilon leaf 518 // node's DFA state position and count the number of such leafs, which 519 // is left in the fLeafCount member. 520 // 521 // The nodeTmp object is passed in just as a temp node to use during 522 // the recursion. Otherwise, we'd have to create a new node on every 523 // level of recursion, which would be piggy in Java (as is everything 524 // for that matter.) 525 // 526 527 /* MODIFIED (Jan, 2001) 528 * 529 * Use following rules. 530 * nullable(x+) := nullable(x), first(x+) := first(x), last(x+) := last(x) 531 * nullable(x?) := true, first(x?) := first(x), last(x?) := last(x) 532 * 533 * The same computation of follow as x* is applied to x+ 534 * 535 * The modification drastically reduces computation time of 536 * "(a, (b, a+, (c, (b, a+)+, a+, (d, (c, (b, a+)+, a+)+, (b, a+)+, a+)+)+)+)+" 537 */ 538 539 // 540 // And handle specially the EOC node, which also must be numbered 541 // and counted as a non-epsilon leaf node. It could not be handled 542 // in the above tree build because it was created before all that 543 // started. We save the EOC position since its used during the DFA 544 // building loop. 545 // 546 int EOCPos = fLeafCount; 547 XSCMLeaf nodeEOC = new XSCMLeaf(XSParticleDecl.PARTICLE_ELEMENT, null, -1, fLeafCount++); 548 fHeadNode = new XSCMBinOp( 549 XSModelGroupImpl.MODELGROUP_SEQUENCE, 550 syntaxTree, 551 nodeEOC 552 ); 553 554 // 555 // Ok, so now we have to iterate the new tree and do a little more 556 // work now that we know the leaf count. One thing we need to do is 557 // to calculate the first and last position sets of each node. This 558 // is cached away in each of the nodes. 559 // 560 // Along the way we also set the leaf count in each node as the 561 // maximum state count. They must know this in order to create their 562 // first/last pos sets. 563 // 564 // We also need to build an array of references to the non-epsilon 565 // leaf nodes. Since we iterate it in the same way as before, this 566 // will put them in the array according to their position values. 567 // 568 fLeafList = new XSCMLeaf[fLeafCount]; 569 fLeafListType = new int[fLeafCount]; 570 postTreeBuildInit(fHeadNode); 571 572 // 573 // And, moving onward... We now need to build the follow position 574 // sets for all the nodes. So we allocate an array of state sets, 575 // one for each leaf node (i.e. each DFA position.) 576 // 577 fFollowList = new CMStateSet[fLeafCount]; 578 for (int index = 0; index < fLeafCount; index++) 579 fFollowList[index] = new CMStateSet(fLeafCount); 580 calcFollowList(fHeadNode); 581 // 582 // And finally the big push... Now we build the DFA using all the 583 // states and the tree we've built up. First we set up the various 584 // data structures we are going to use while we do this. 585 // 586 // First of all we need an array of unique element names in our 587 // content model. For each transition table entry, we need a set of 588 // contiguous indices to represent the transitions for a particular 589 // input element. So we need to a zero based range of indexes that 590 // map to element types. This element map provides that mapping. 591 // 592 fElemMap = new Object[fLeafCount]; 593 fElemMapType = new int[fLeafCount]; 594 fElemMapId = new int[fLeafCount]; 595 596 fElemMapCounter = new int[fLeafCount]; 597 fElemMapCounterLowerBound = new int[fLeafCount]; 598 fElemMapCounterUpperBound = new int[fLeafCount]; 599 600 fElemMapSize = 0; 601 Occurence [] elemOccurenceMap = null; 602 603 for (int outIndex = 0; outIndex < fLeafCount; outIndex++) { 604 // optimization from Henry Zongaro: 605 //fElemMap[outIndex] = new Object (); 606 fElemMap[outIndex] = null; 607 608 int inIndex = 0; 609 final int id = fLeafList[outIndex].getParticleId(); 610 for (; inIndex < fElemMapSize; inIndex++) { 611 if (id == fElemMapId[inIndex]) 612 break; 613 } 614 615 // If it was not in the list, then add it, if not the EOC node 616 if (inIndex == fElemMapSize) { 617 XSCMLeaf leaf = fLeafList[outIndex]; 618 fElemMap[fElemMapSize] = leaf.getLeaf(); 619 if (leaf instanceof XSCMRepeatingLeaf) { 620 if (elemOccurenceMap == null) { 621 elemOccurenceMap = new Occurence[fLeafCount]; 622 } 623 elemOccurenceMap[fElemMapSize] = new Occurence((XSCMRepeatingLeaf) leaf, fElemMapSize); 624 } 625 626 fElemMapType[fElemMapSize] = fLeafListType[outIndex]; 627 fElemMapId[fElemMapSize] = id; 628 629 // Init counters and bounds for a{n,m} algorithm 630 int[] bounds = (int[]) leaf.getUserData(); 631 if (bounds != null) { 632 fElemMapCounter[fElemMapSize] = 0; 633 fElemMapCounterLowerBound[fElemMapSize] = bounds[0]; 634 fElemMapCounterUpperBound[fElemMapSize] = bounds[1]; 635 } else { 636 fElemMapCounter[fElemMapSize] = -1; 637 fElemMapCounterLowerBound[fElemMapSize] = -1; 638 fElemMapCounterUpperBound[fElemMapSize] = -1; 639 } 640 641 fElemMapSize++; 642 } 643 } 644 645 // the last entry in the element map must be the EOC element. 646 // remove it from the map. 647 if (DEBUG) { 648 if (fElemMapId[fElemMapSize-1] != -1) 649 System.err.println("interal error in DFA: last element is not EOC."); 650 } 651 fElemMapSize--; 652 653 /*** 654 * Optimization(Jan, 2001); We sort fLeafList according to 655 * elemIndex which is *uniquely* associated to each leaf. 656 * We are *assuming* that each element appears in at least one leaf. 657 **/ 658 659 int[] fLeafSorter = new int[fLeafCount + fElemMapSize]; 660 int fSortCount = 0; 661 662 for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) { 663 final int id = fElemMapId[elemIndex]; 664 for (int leafIndex = 0; leafIndex < fLeafCount; leafIndex++) { 665 if (id == fLeafList[leafIndex].getParticleId()) 666 fLeafSorter[fSortCount++] = leafIndex; 667 } 668 fLeafSorter[fSortCount++] = -1; 669 } 670 671 /* Optimization(Jan, 2001) */ 672 673 // 674 // Next lets create some arrays, some that hold transient 675 // information during the DFA build and some that are permament. 676 // These are kind of sticky since we cannot know how big they will 677 // get, but we don't want to use any Java collections because of 678 // performance. 679 // 680 // Basically they will probably be about fLeafCount*2 on average, 681 // but can be as large as 2^(fLeafCount*2), worst case. So we start 682 // with fLeafCount*4 as a middle ground. This will be very unlikely 683 // to ever have to expand, though it if does, the overhead will be 684 // somewhat ugly. 685 // 686 int curArraySize = fLeafCount * 4; 687 CMStateSet[] statesToDo = new CMStateSet[curArraySize]; 688 fFinalStateFlags = new boolean[curArraySize]; 689 fTransTable = new int[curArraySize][]; 690 691 // 692 // Ok we start with the initial set as the first pos set of the 693 // head node (which is the seq node that holds the content model 694 // and the EOC node.) 695 // 696 CMStateSet setT = fHeadNode.firstPos(); 697 698 // 699 // Init our two state flags. Basically the unmarked state counter 700 // is always chasing the current state counter. When it catches up, 701 // that means we made a pass through that did not add any new states 702 // to the lists, at which time we are done. We could have used a 703 // expanding array of flags which we used to mark off states as we 704 // complete them, but this is easier though less readable maybe. 705 // 706 int unmarkedState = 0; 707 int curState = 0; 708 709 // 710 // Init the first transition table entry, and put the initial state 711 // into the states to do list, then bump the current state. 712 // 713 fTransTable[curState] = makeDefStateList(); 714 statesToDo[curState] = setT; 715 curState++; 716 717 /* Optimization(Jan, 2001); This is faster for 718 * a large content model such as, "(t001+|t002+|.... |t500+)". 719 */ 720 721 HashMap stateTable = new HashMap(); 722 723 /* Optimization(Jan, 2001) */ 724 725 // 726 // Ok, almost done with the algorithm... We now enter the 727 // loop where we go until the states done counter catches up with 728 // the states to do counter. 729 // 730 while (unmarkedState < curState) { 731 // 732 // Get the first unmarked state out of the list of states to do. 733 // And get the associated transition table entry. 734 // 735 setT = statesToDo[unmarkedState]; 736 int[] transEntry = fTransTable[unmarkedState]; 737 738 // Mark this one final if it contains the EOC state 739 fFinalStateFlags[unmarkedState] = setT.getBit(EOCPos); 740 741 // Bump up the unmarked state count, marking this state done 742 unmarkedState++; 743 744 // Loop through each possible input symbol in the element map 745 CMStateSet newSet = null; 746 /* Optimization(Jan, 2001) */ 747 int sorterIndex = 0; 748 /* Optimization(Jan, 2001) */ 749 for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) { 750 // 751 // Build up a set of states which is the union of all of 752 // the follow sets of DFA positions that are in the current 753 // state. If we gave away the new set last time through then 754 // create a new one. Otherwise, zero out the existing one. 755 // 756 if (newSet == null) 757 newSet = new CMStateSet(fLeafCount); 758 else 759 newSet.zeroBits(); 760 761 /* Optimization(Jan, 2001) */ 762 int leafIndex = fLeafSorter[sorterIndex++]; 763 764 while (leafIndex != -1) { 765 // If this leaf index (DFA position) is in the current set... 766 if (setT.getBit(leafIndex)) { 767 // 768 // If this leaf is the current input symbol, then we 769 // want to add its follow list to the set of states to 770 // transition to from the current state. 771 // 772 newSet.union(fFollowList[leafIndex]); 773 } 774 775 leafIndex = fLeafSorter[sorterIndex++]; 776 } 777 /* Optimization(Jan, 2001) */ 778 779 // 780 // If this new set is not empty, then see if its in the list 781 // of states to do. If not, then add it. 782 // 783 if (!newSet.isEmpty()) { 784 // 785 // Search the 'states to do' list to see if this new 786 // state set is already in there. 787 // 788 789 /* Optimization(Jan, 2001) */ 790 Integer stateObj = (Integer)stateTable.get(newSet); 791 int stateIndex = (stateObj == null ? curState : stateObj.intValue()); 792 /* Optimization(Jan, 2001) */ 793 794 // If we did not find it, then add it 795 if (stateIndex == curState) { 796 // 797 // Put this new state into the states to do and init 798 // a new entry at the same index in the transition 799 // table. 800 // 801 statesToDo[curState] = newSet; 802 fTransTable[curState] = makeDefStateList(); 803 804 /* Optimization(Jan, 2001) */ 805 stateTable.put(newSet, new Integer(curState)); 806 /* Optimization(Jan, 2001) */ 807 808 // We now have a new state to do so bump the count 809 curState++; 810 811 // 812 // Null out the new set to indicate we adopted it. 813 // This will cause the creation of a new set on the 814 // next time around the loop. 815 // 816 newSet = null; 817 } 818 819 // 820 // Now set this state in the transition table's entry 821 // for this element (using its index), with the DFA 822 // state we will move to from the current state when we 823 // see this input element. 824 // 825 transEntry[elemIndex] = stateIndex; 826 827 // Expand the arrays if we're full 828 if (curState == curArraySize) { 829 // 830 // Yikes, we overflowed the initial array size, so 831 // we've got to expand all of these arrays. So adjust 832 // up the size by 50% and allocate new arrays. 833 // 834 final int newSize = (int)(curArraySize * 1.5); 835 CMStateSet[] newToDo = new CMStateSet[newSize]; 836 boolean[] newFinalFlags = new boolean[newSize]; 837 int[][] newTransTable = new int[newSize][]; 838 839 // Copy over all of the existing content 840 System.arraycopy(statesToDo, 0, newToDo, 0, curArraySize); 841 System.arraycopy(fFinalStateFlags, 0, newFinalFlags, 0, curArraySize); 842 System.arraycopy(fTransTable, 0, newTransTable, 0, curArraySize); 843 844 // Store the new array size 845 curArraySize = newSize; 846 statesToDo = newToDo; 847 fFinalStateFlags = newFinalFlags; 848 fTransTable = newTransTable; 849 } 850 } 851 } 852 } 853 854 // 855 // Fill in the occurence information for each looping state 856 // if we're using counters. 857 // 858 if (elemOccurenceMap != null) { 859 fCountingStates = new Occurence[curState]; 860 for (int i = 0; i < curState; ++i) { 861 int [] transitions = fTransTable[i]; 862 for (int j = 0; j < transitions.length; ++j) { 863 if (i == transitions[j]) { 864 fCountingStates[i] = elemOccurenceMap[j]; 865 break; 866 } 867 } 868 } 869 } 870 871 // 872 // And now we can say bye bye to the temp representation since we've 873 // built the DFA. 874 // 875 if (DEBUG_VALIDATE_CONTENT) 876 dumpTree(fHeadNode, 0); 877 fHeadNode = null; 878 fLeafList = null; 879 fFollowList = null; 880 fLeafListType = null; 881 fElemMapId = null; 882 } 883 884 /** 885 * Calculates the follow list of the current node. 886 * 887 * @param nodeCur The curent node. 888 * 889 * @exception RuntimeException Thrown if follow list cannot be calculated. 890 */ 891 private void calcFollowList(CMNode nodeCur) { 892 // Recurse as required 893 if (nodeCur.type() == XSModelGroupImpl.MODELGROUP_CHOICE) { 894 // Recurse only 895 calcFollowList(((XSCMBinOp)nodeCur).getLeft()); 896 calcFollowList(((XSCMBinOp)nodeCur).getRight()); 897 } 898 else if (nodeCur.type() == XSModelGroupImpl.MODELGROUP_SEQUENCE) { 899 // Recurse first 900 calcFollowList(((XSCMBinOp)nodeCur).getLeft()); 901 calcFollowList(((XSCMBinOp)nodeCur).getRight()); 902 903 // 904 // Now handle our level. We use our left child's last pos 905 // set and our right child's first pos set, so go ahead and 906 // get them ahead of time. 907 // 908 final CMStateSet last = ((XSCMBinOp)nodeCur).getLeft().lastPos(); 909 final CMStateSet first = ((XSCMBinOp)nodeCur).getRight().firstPos(); 910 911 // 912 // Now, for every position which is in our left child's last set 913 // add all of the states in our right child's first set to the 914 // follow set for that position. 915 // 916 for (int index = 0; index < fLeafCount; index++) { 917 if (last.getBit(index)) 918 fFollowList[index].union(first); 919 } 920 } 921 else if (nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_MORE 922 || nodeCur.type() == XSParticleDecl.PARTICLE_ONE_OR_MORE) { 923 // Recurse first 924 calcFollowList(((XSCMUniOp)nodeCur).getChild()); 925 926 // 927 // Now handle our level. We use our own first and last position 928 // sets, so get them up front. 929 // 930 final CMStateSet first = nodeCur.firstPos(); 931 final CMStateSet last = nodeCur.lastPos(); 932 933 // 934 // For every position which is in our last position set, add all 935 // of our first position states to the follow set for that 936 // position. 937 // 938 for (int index = 0; index < fLeafCount; index++) { 939 if (last.getBit(index)) 940 fFollowList[index].union(first); 941 } 942 } 943 944 else if (nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_ONE) { 945 // Recurse only 946 calcFollowList(((XSCMUniOp)nodeCur).getChild()); 947 } 948 949 } 950 951 /** 952 * Dumps the tree of the current node to standard output. 953 * 954 * @param nodeCur The current node. 955 * @param level The maximum levels to output. 956 * 957 * @exception RuntimeException Thrown on error. 958 */ 959 private void dumpTree(CMNode nodeCur, int level) { 960 for (int index = 0; index < level; index++) 961 System.out.print(" "); 962 963 int type = nodeCur.type(); 964 965 switch(type ) { 966 967 case XSModelGroupImpl.MODELGROUP_CHOICE: 968 case XSModelGroupImpl.MODELGROUP_SEQUENCE: { 969 if (type == XSModelGroupImpl.MODELGROUP_CHOICE) 970 System.out.print("Choice Node "); 971 else 972 System.out.print("Seq Node "); 973 974 if (nodeCur.isNullable()) 975 System.out.print("Nullable "); 976 977 System.out.print("firstPos="); 978 System.out.print(nodeCur.firstPos().toString()); 979 System.out.print(" lastPos="); 980 System.out.println(nodeCur.lastPos().toString()); 981 982 dumpTree(((XSCMBinOp)nodeCur).getLeft(), level+1); 983 dumpTree(((XSCMBinOp)nodeCur).getRight(), level+1); 984 break; 985 } 986 case XSParticleDecl.PARTICLE_ZERO_OR_MORE: 987 case XSParticleDecl.PARTICLE_ONE_OR_MORE: 988 case XSParticleDecl.PARTICLE_ZERO_OR_ONE: { 989 System.out.print("Rep Node "); 990 991 if (nodeCur.isNullable()) 992 System.out.print("Nullable "); 993 994 System.out.print("firstPos="); 995 System.out.print(nodeCur.firstPos().toString()); 996 System.out.print(" lastPos="); 997 System.out.println(nodeCur.lastPos().toString()); 998 999 dumpTree(((XSCMUniOp)nodeCur).getChild(), level+1); 1000 break; 1001 } 1002 case XSParticleDecl.PARTICLE_ELEMENT: { 1003 System.out.print 1004 ( 1005 "Leaf: (pos=" 1006 + ((XSCMLeaf)nodeCur).getPosition() 1007 + "), " 1008 + "(elemIndex=" 1009 + ((XSCMLeaf)nodeCur).getLeaf() 1010 + ") " 1011 ); 1012 1013 if (nodeCur.isNullable()) 1014 System.out.print(" Nullable "); 1015 1016 System.out.print("firstPos="); 1017 System.out.print(nodeCur.firstPos().toString()); 1018 System.out.print(" lastPos="); 1019 System.out.println(nodeCur.lastPos().toString()); 1020 break; 1021 } 1022 case XSParticleDecl.PARTICLE_WILDCARD: 1023 System.out.print("Any Node: "); 1024 1025 System.out.print("firstPos="); 1026 System.out.print(nodeCur.firstPos().toString()); 1027 System.out.print(" lastPos="); 1028 System.out.println(nodeCur.lastPos().toString()); 1029 break; 1030 default: { 1031 throw new RuntimeException("ImplementationMessages.VAL_NIICM"); 1032 } 1033 } 1034 1035 } 1036 1037 1038 /** 1039 * -1 is used to represent bad transitions in the transition table 1040 * entry for each state. So each entry is initialized to an all -1 1041 * array. This method creates a new entry and initializes it. 1042 */ 1043 private int[] makeDefStateList() 1044 { 1045 int[] retArray = new int[fElemMapSize]; 1046 for (int index = 0; index < fElemMapSize; index++) 1047 retArray[index] = -1; 1048 return retArray; 1049 } 1050 1051 /** Post tree build initialization. */ 1052 private void postTreeBuildInit(CMNode nodeCur) throws RuntimeException { 1053 // Set the maximum states on this node 1054 nodeCur.setMaxStates(fLeafCount); 1055 1056 XSCMLeaf leaf = null; 1057 int pos = 0; 1058 // Recurse as required 1059 if (nodeCur.type() == XSParticleDecl.PARTICLE_WILDCARD) { 1060 leaf = (XSCMLeaf)nodeCur; 1061 pos = leaf.getPosition(); 1062 fLeafList[pos] = leaf; 1063 fLeafListType[pos] = XSParticleDecl.PARTICLE_WILDCARD; 1064 } 1065 else if ((nodeCur.type() == XSModelGroupImpl.MODELGROUP_CHOICE) || 1066 (nodeCur.type() == XSModelGroupImpl.MODELGROUP_SEQUENCE)) { 1067 postTreeBuildInit(((XSCMBinOp)nodeCur).getLeft()); 1068 postTreeBuildInit(((XSCMBinOp)nodeCur).getRight()); 1069 } 1070 else if (nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_MORE || 1071 nodeCur.type() == XSParticleDecl.PARTICLE_ONE_OR_MORE || 1072 nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_ONE) { 1073 postTreeBuildInit(((XSCMUniOp)nodeCur).getChild()); 1074 } 1075 else if (nodeCur.type() == XSParticleDecl.PARTICLE_ELEMENT) { 1076 // Put this node in the leaf list at the current index if its 1077 // a non-epsilon leaf. 1078 leaf = (XSCMLeaf)nodeCur; 1079 pos = leaf.getPosition(); 1080 fLeafList[pos] = leaf; 1081 fLeafListType[pos] = XSParticleDecl.PARTICLE_ELEMENT; 1082 } 1083 else { 1084 throw new RuntimeException("ImplementationMessages.VAL_NIICM"); 1085 } 1086 } 1087 1088 /** 1089 * check whether this content violates UPA constraint. 1090 * 1091 * @param subGroupHandler the substitution group handler 1092 * @return true if this content model contains other or list wildcard 1093 */ 1094 public boolean checkUniqueParticleAttribution(SubstitutionGroupHandler subGroupHandler) throws XMLSchemaException { 1095 // Unique Particle Attribution 1096 // store the conflict results between any two elements in fElemMap 1097 // 0: not compared; -1: no conflict; 1: conflict 1098 // initialize the conflict table (all 0 initially) 1099 byte conflictTable[][] = new byte[fElemMapSize][fElemMapSize]; 1100 1101 // for each state, check whether it has overlap transitions 1102 for (int i = 0; i < fTransTable.length && fTransTable[i] != null; i++) { 1103 for (int j = 0; j < fElemMapSize; j++) { 1104 for (int k = j+1; k < fElemMapSize; k++) { 1105 if (fTransTable[i][j] != -1 && 1106 fTransTable[i][k] != -1) { 1107 if (conflictTable[j][k] == 0) { 1108 if (XSConstraints.overlapUPA 1109 (fElemMap[j], fElemMap[k], 1110 subGroupHandler)) { 1111 if (fCountingStates != null) { 1112 Occurence o = fCountingStates[i]; 1113 // If "i" is a counting state and exactly one of the transitions 1114 // loops back to "i" then the two particles do not overlap if 1115 // minOccurs == maxOccurs. 1116 if (o != null && 1117 fTransTable[i][j] == i ^ fTransTable[i][k] == i && 1118 o.minOccurs == o.maxOccurs) { 1119 conflictTable[j][k] = (byte) -1; 1120 continue; 1121 } 1122 } 1123 conflictTable[j][k] = (byte) 1; 1124 } 1125 else { 1126 conflictTable[j][k] = (byte) -1; 1127 } 1128 } 1129 } 1130 } 1131 } 1132 } 1133 1134 // report all errors 1135 for (int i = 0; i < fElemMapSize; i++) { 1136 for (int j = 0; j < fElemMapSize; j++) { 1137 if (conflictTable[i][j] == 1) { 1138 //errors.newError("cos-nonambig", new Object[]{fElemMap[i].toString(), 1139 // fElemMap[j].toString()}); 1140 // REVISIT: do we want to report all errors? or just one? 1141 throw new XMLSchemaException("cos-nonambig", new Object[]{fElemMap[i].toString(), 1142 fElemMap[j].toString()}); 1143 } 1144 } 1145 } 1146 1147 // if there is a other or list wildcard, we need to check this CM 1148 // again, if this grammar is cached. 1149 for (int i = 0; i < fElemMapSize; i++) { 1150 if (fElemMapType[i] == XSParticleDecl.PARTICLE_WILDCARD) { 1151 XSWildcardDecl wildcard = (XSWildcardDecl)fElemMap[i]; 1152 if (wildcard.fType == XSWildcardDecl.NSCONSTRAINT_LIST || 1153 wildcard.fType == XSWildcardDecl.NSCONSTRAINT_NOT) { 1154 return true; 1155 } 1156 } 1157 } 1158 1159 return false; 1160 } 1161 1162 /** 1163 * Check which elements are valid to appear at this point. This method also 1164 * works if the state is in error, in which case it returns what should 1165 * have been seen. 1166 * 1167 * @param state the current state 1168 * @return a list whose entries are instances of 1169 * either XSWildcardDecl or XSElementDecl. 1170 */ 1171 public ArrayList whatCanGoHere(int[] state) { 1172 int curState = state[0]; 1173 if (curState < 0) 1174 curState = state[1]; 1175 Occurence o = (fCountingStates != null) ? 1176 fCountingStates[curState] : null; 1177 int count = state[2]; 1178 1179 ArrayList ret = new ArrayList(); 1180 for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) { 1181 int nextState = fTransTable[curState][elemIndex]; 1182 if (nextState != -1) { 1183 if (o != null) { 1184 if (curState == nextState) { 1185 // Do not include transitions which loop back to the 1186 // current state if we've looped the maximum number 1187 // of times or greater. 1188 if (count >= o.maxOccurs && 1189 o.maxOccurs != SchemaSymbols.OCCURRENCE_UNBOUNDED) { 1190 continue; 1191 } 1192 } 1193 // Do not include transitions which advance past the 1194 // current state if we have not looped enough times. 1195 else if (count < o.minOccurs) { 1196 continue; 1197 } 1198 } 1199 ret.add(fElemMap[elemIndex]); 1200 } 1201 } 1202 return ret; 1203 } 1204 1205 /** 1206 * Used by constant space algorithm for a{n,m} for n > 1 and 1207 * m <= unbounded. Called by a validator if validation of 1208 * countent model succeeds after subsuming a{n,m} to a* 1209 * (or a+) to check the n and m bounds. 1210 * Returns <code>null</code> if validation of bounds is 1211 * successful. Returns a list of strings with error info 1212 * if not. Even entries in list returned are error codes 1213 * (used to look up properties) and odd entries are parameters 1214 * to be passed when formatting error message. Each parameter 1215 * is associated with the error code that preceeds it in 1216 * the list. 1217 */ 1218 public ArrayList checkMinMaxBounds() { 1219 ArrayList result = null; 1220 for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) { 1221 int count = fElemMapCounter[elemIndex]; 1222 if (count == -1) { 1223 continue; 1224 } 1225 final int minOccurs = fElemMapCounterLowerBound[elemIndex]; 1226 final int maxOccurs = fElemMapCounterUpperBound[elemIndex]; 1227 if (count < minOccurs) { 1228 if (result == null) result = new ArrayList(); 1229 result.add("cvc-complex-type.2.4.b"); 1230 result.add("{" + fElemMap[elemIndex] + "}"); 1231 } 1232 if (maxOccurs != -1 && count > maxOccurs) { 1233 if (result == null) result = new ArrayList(); 1234 result.add("cvc-complex-type.2.4.d.1"); 1235 result.add("{" + fElemMap[elemIndex] + "}"); 1236 } 1237 } 1238 return result; 1239 } 1240 1241 public int [] occurenceInfo(int[] state) { 1242 if (fCountingStates != null) { 1243 int curState = state[0]; 1244 if (curState < 0) { 1245 curState = state[1]; 1246 } 1247 Occurence o = fCountingStates[curState]; 1248 if (o != null) { 1249 int [] occurenceInfo = new int[4]; 1250 occurenceInfo[0] = o.minOccurs; 1251 occurenceInfo[1] = o.maxOccurs; 1252 occurenceInfo[2] = state[2]; 1253 occurenceInfo[3] = o.elemIndex; 1254 return occurenceInfo; 1255 } 1256 } 1257 return null; 1258 } 1259 1260 public String getTermName(int termId) { 1261 Object term = fElemMap[termId]; 1262 return (term != null) ? term.toString() : null; 1263 } 1264 1265 public boolean isCompactedForUPA() { 1266 return fIsCompactedForUPA; 1267 } 1268 } // class DFAContentModel