1 /* 2 * reserved comment block 3 * DO NOT REMOVE OR ALTER! 4 */ 5 /* 6 * Licensed to the Apache Software Foundation (ASF) under one or more 7 * contributor license agreements. See the NOTICE file distributed with 8 * this work for additional information regarding copyright ownership. 9 * The ASF licenses this file to You under the Apache License, Version 2.0 10 * (the "License"); you may not use this file except in compliance with 11 * the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, software 16 * distributed under the License is distributed on an "AS IS" BASIS, 17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 * See the License for the specific language governing permissions and 19 * limitations under the License. 20 */ 21 22 package com.sun.org.apache.xerces.internal.impl.dtd.models; 23 24 import java.util.HashMap; 25 26 import com.sun.org.apache.xerces.internal.impl.dtd.XMLContentSpec; 27 import com.sun.org.apache.xerces.internal.xni.QName; 28 29 /** 30 31 * @version $Id: DFAContentModel.java,v 1.4 2010/08/06 23:49:43 joehw Exp $ 32 * DFAContentModel is the derivative of ContentModel that does 33 * all of the non-trivial element content validation. This class does 34 * the conversion from the regular expression to the DFA that 35 * it then uses in its validation algorithm. 36 * <p> 37 * <b>Note:</b> Upstream work insures that this class will never see 38 * a content model with PCDATA in it. Any model with PCDATA is 'mixed' 39 * and is handled via the MixedContentModel class since mixed models 40 * are very constrained in form and easily handled via a special case. 41 * This also makes implementation of this class much easier. 42 * 43 * @xerces.internal 44 * 45 * @version $Id: DFAContentModel.java,v 1.4 2010/08/06 23:49:43 joehw Exp $ 46 */ 47 public class DFAContentModel 48 implements ContentModelValidator { 49 50 // 51 // Constants 52 // 53 // special strings 54 55 /** Epsilon string. */ 56 private static String fEpsilonString = "<<CMNODE_EPSILON>>"; 57 58 /** End-of-content string. */ 59 private static String fEOCString = "<<CMNODE_EOC>>"; 60 61 /** initializing static members **/ 62 static { 63 fEpsilonString = fEpsilonString.intern(); 64 fEOCString = fEOCString.intern(); 65 } 66 67 // debugging 68 69 /** Set to true to debug content model validation. */ 70 private static final boolean DEBUG_VALIDATE_CONTENT = false; 71 72 // 73 // Data 74 // 75 76 /* this is the EquivClassComparator object */ 77 //private EquivClassComparator comparator = null; 78 79 /** 80 * This is the map of unique input symbol elements to indices into 81 * each state's per-input symbol transition table entry. This is part 82 * of the built DFA information that must be kept around to do the 83 * actual validation. 84 */ 85 private QName fElemMap[] = null; 86 87 /** 88 * This is a map of whether the element map contains information 89 * related to ANY models. 90 */ 91 private int fElemMapType[] = null; 92 93 /** The element map size. */ 94 private int fElemMapSize = 0; 95 96 /** Boolean to distinguish Schema Mixed Content */ 97 private boolean fMixed; 98 99 /** 100 * The NFA position of the special EOC (end of content) node. This 101 * is saved away since it's used during the DFA build. 102 */ 103 private int fEOCPos = 0; 104 105 106 /** 107 * This is an array of booleans, one per state (there are 108 * fTransTableSize states in the DFA) that indicates whether that 109 * state is a final state. 110 */ 111 private boolean fFinalStateFlags[] = null; 112 113 /** 114 * The list of follow positions for each NFA position (i.e. for each 115 * non-epsilon leaf node.) This is only used during the building of 116 * the DFA, and is let go afterwards. 117 */ 118 private CMStateSet fFollowList[] = null; 119 120 /** 121 * This is the head node of our intermediate representation. It is 122 * only non-null during the building of the DFA (just so that it 123 * does not have to be passed all around.) Once the DFA is built, 124 * this is no longer required so its nulled out. 125 */ 126 private CMNode fHeadNode = null; 127 128 /** 129 * The count of leaf nodes. This is an important number that set some 130 * limits on the sizes of data structures in the DFA process. 131 */ 132 private int fLeafCount = 0; 133 134 /** 135 * An array of non-epsilon leaf nodes, which is used during the DFA 136 * build operation, then dropped. 137 */ 138 private CMLeaf fLeafList[] = null; 139 140 /** Array mapping ANY types to the leaf list. */ 141 private int fLeafListType[] = null; 142 143 //private ContentLeafNameTypeVector fLeafNameTypeVector = null; 144 145 /** 146 * The string pool of our parser session. This is set during construction 147 * and kept around. 148 */ 149 //private StringPool fStringPool = null; 150 151 /** 152 * This is the transition table that is the main by product of all 153 * of the effort here. It is an array of arrays of ints. The first 154 * dimension is the number of states we end up with in the DFA. The 155 * second dimensions is the number of unique elements in the content 156 * model (fElemMapSize). Each entry in the second dimension indicates 157 * the new state given that input for the first dimension's start 158 * state. 159 * <p> 160 * The fElemMap array handles mapping from element indexes to 161 * positions in the second dimension of the transition table. 162 */ 163 private int fTransTable[][] = null; 164 165 /** 166 * The number of valid entries in the transition table, and in the other 167 * related tables such as fFinalStateFlags. 168 */ 169 private int fTransTableSize = 0; 170 171 /** 172 * Flag that indicates that even though we have a "complicated" 173 * content model, it is valid to have no content. In other words, 174 * all parts of the content model are optional. For example: 175 * <pre> 176 * <!ELEMENT AllOptional (Optional*,NotRequired?)> 177 * </pre> 178 */ 179 private boolean fEmptyContentIsValid = false; 180 181 // temp variables 182 183 /** Temporary qualified name. */ 184 private final QName fQName = new QName(); 185 186 // 187 // Constructors 188 // 189 190 191 // 192 // Constructors 193 // 194 195 /** 196 * Constructs a DFA content model. 197 * 198 * @param syntaxTree The syntax tree of the content model. 199 * @param leafCount The number of leaves. 200 * @param mixed 201 * 202 */ 203 public DFAContentModel(CMNode syntaxTree, int leafCount, boolean mixed) { 204 // Store away our index and pools in members 205 //fStringPool = stringPool; 206 fLeafCount = leafCount; 207 208 209 // this is for Schema Mixed Content 210 fMixed = mixed; 211 212 // 213 // Ok, so lets grind through the building of the DFA. This method 214 // handles the high level logic of the algorithm, but it uses a 215 // number of helper classes to do its thing. 216 // 217 // In order to avoid having hundreds of references to the error and 218 // string handlers around, this guy and all of his helper classes 219 // just throw a simple exception and we then pass it along. 220 // 221 buildDFA(syntaxTree); 222 } 223 224 // 225 // ContentModelValidator methods 226 // 227 228 /** 229 * Check that the specified content is valid according to this 230 * content model. This method can also be called to do 'what if' 231 * testing of content models just to see if they would be valid. 232 * <p> 233 * A value of -1 in the children array indicates a PCDATA node. All other 234 * indexes will be positive and represent child elements. The count can be 235 * zero, since some elements have the EMPTY content model and that must be 236 * confirmed. 237 * 238 * @param children The children of this element. Each integer is an index within 239 * the <code>StringPool</code> of the child element name. An index 240 * of -1 is used to indicate an occurrence of non-whitespace character 241 * data. 242 * @param offset Offset into the array where the children starts. 243 * @param length The number of entries in the <code>children</code> array. 244 * 245 * @return The value -1 if fully valid, else the 0 based index of the child 246 * that first failed. If the value returned is equal to the number 247 * of children, then the specified children are valid but additional 248 * content is required to reach a valid ending state. 249 * 250 */ 251 public int validate(QName[] children, int offset, int length) { 252 253 if (DEBUG_VALIDATE_CONTENT) 254 System.out.println("DFAContentModel#validateContent"); 255 256 // 257 // A DFA content model must *always* have at least 1 child 258 // so a failure is given if no children present. 259 // 260 // Defect 782: This is an incorrect statement because a DFA 261 // content model is also used for constructions such as: 262 // 263 // (Optional*,NotRequired?) 264 // 265 // where a perfectly valid content would be NO CHILDREN. 266 // Therefore, if there are no children, we must check to 267 // see if the CMNODE_EOC marker is a valid start state! -Ac 268 // 269 if (length == 0) { 270 if (DEBUG_VALIDATE_CONTENT) { 271 System.out.println("!!! no children"); 272 System.out.println("elemMap="+fElemMap); 273 for (int i = 0; i < fElemMap.length; i++) { 274 String uri = fElemMap[i].uri; 275 String localpart = fElemMap[i].localpart; 276 277 System.out.println("fElemMap["+i+"]="+uri+","+ 278 localpart+" ("+ 279 uri+", "+ 280 localpart+ 281 ')'); 282 283 } 284 System.out.println("EOCIndex="+fEOCString); 285 } 286 287 return fEmptyContentIsValid ? -1 : 0; 288 289 } // if child count == 0 290 291 // 292 // Lets loop through the children in the array and move our way 293 // through the states. Note that we use the fElemMap array to map 294 // an element index to a state index. 295 // 296 int curState = 0; 297 for (int childIndex = 0; childIndex < length; childIndex++) 298 { 299 // Get the current element index out 300 final QName curElem = children[offset + childIndex]; 301 // ignore mixed text 302 if (fMixed && curElem.localpart == null) { 303 continue; 304 } 305 306 // Look up this child in our element map 307 int elemIndex = 0; 308 for (; elemIndex < fElemMapSize; elemIndex++) 309 { 310 int type = fElemMapType[elemIndex] & 0x0f ; 311 if (type == XMLContentSpec.CONTENTSPECNODE_LEAF) { 312 //System.out.println("fElemMap["+elemIndex+"]: "+fElemMap[elemIndex]); 313 if (fElemMap[elemIndex].rawname == curElem.rawname) { 314 break; 315 } 316 } 317 else if (type == XMLContentSpec.CONTENTSPECNODE_ANY) { 318 String uri = fElemMap[elemIndex].uri; 319 if (uri == null || uri == curElem.uri) { 320 break; 321 } 322 } 323 else if (type == XMLContentSpec.CONTENTSPECNODE_ANY_LOCAL) { 324 if (curElem.uri == null) { 325 break; 326 } 327 } 328 else if (type == XMLContentSpec.CONTENTSPECNODE_ANY_OTHER) { 329 if (fElemMap[elemIndex].uri != curElem.uri) { 330 break; 331 } 332 } 333 } 334 335 // If we didn't find it, then obviously not valid 336 if (elemIndex == fElemMapSize) { 337 if (DEBUG_VALIDATE_CONTENT) { 338 System.out.println("!!! didn't find it"); 339 340 System.out.println("curElem : " +curElem ); 341 for (int i=0; i<fElemMapSize; i++) { 342 System.out.println("fElemMap["+i+"] = " +fElemMap[i] ); 343 System.out.println("fElemMapType["+i+"] = " +fElemMapType[i] ); 344 } 345 } 346 347 return childIndex; 348 } 349 350 // 351 // Look up the next state for this input symbol when in the 352 // current state. 353 // 354 curState = fTransTable[curState][elemIndex]; 355 356 // If its not a legal transition, then invalid 357 if (curState == -1) { 358 if (DEBUG_VALIDATE_CONTENT) 359 System.out.println("!!! not a legal transition"); 360 return childIndex; 361 } 362 } 363 364 // 365 // We transitioned all the way through the input list. However, that 366 // does not mean that we ended in a final state. So check whether 367 // our ending state is a final state. 368 // 369 if (DEBUG_VALIDATE_CONTENT) 370 System.out.println("curState="+curState+", childCount="+length); 371 if (!fFinalStateFlags[curState]) 372 return length; 373 374 // success! 375 return -1; 376 } // validate 377 378 379 // 380 // Private methods 381 // 382 383 /** 384 * Builds the internal DFA transition table from the given syntax tree. 385 * 386 * @param syntaxTree The syntax tree. 387 * 388 * @exception CMException Thrown if DFA cannot be built. 389 */ 390 private void buildDFA(CMNode syntaxTree) 391 { 392 // 393 // The first step we need to take is to rewrite the content model 394 // using our CMNode objects, and in the process get rid of any 395 // repetition short cuts, converting them into '*' style repetitions 396 // or getting rid of repetitions altogether. 397 // 398 // The conversions done are: 399 // 400 // x+ -> (x|x*) 401 // x? -> (x|epsilon) 402 // 403 // This is a relatively complex scenario. What is happening is that 404 // we create a top level binary node of which the special EOC value 405 // is set as the right side node. The the left side is set to the 406 // rewritten syntax tree. The source is the original content model 407 // info from the decl pool. The rewrite is done by buildSyntaxTree() 408 // which recurses the decl pool's content of the element and builds 409 // a new tree in the process. 410 // 411 // Note that, during this operation, we set each non-epsilon leaf 412 // node's DFA state position and count the number of such leafs, which 413 // is left in the fLeafCount member. 414 // 415 // The nodeTmp object is passed in just as a temp node to use during 416 // the recursion. Otherwise, we'd have to create a new node on every 417 // level of recursion, which would be piggy in Java (as is everything 418 // for that matter.) 419 // 420 421 /* MODIFIED (Jan, 2001) 422 * 423 * Use following rules. 424 * nullable(x+) := nullable(x), first(x+) := first(x), last(x+) := last(x) 425 * nullable(x?) := true, first(x?) := first(x), last(x?) := last(x) 426 * 427 * The same computation of follow as x* is applied to x+ 428 * 429 * The modification drastically reduces computation time of 430 * "(a, (b, a+, (c, (b, a+)+, a+, (d, (c, (b, a+)+, a+)+, (b, a+)+, a+)+)+)+)+" 431 */ 432 433 fQName.setValues(null, fEOCString, fEOCString, null); 434 CMLeaf nodeEOC = new CMLeaf(fQName); 435 fHeadNode = new CMBinOp 436 ( 437 XMLContentSpec.CONTENTSPECNODE_SEQ 438 , syntaxTree 439 , nodeEOC 440 ); 441 442 // 443 // And handle specially the EOC node, which also must be numbered 444 // and counted as a non-epsilon leaf node. It could not be handled 445 // in the above tree build because it was created before all that 446 // started. We save the EOC position since its used during the DFA 447 // building loop. 448 // 449 fEOCPos = fLeafCount; 450 nodeEOC.setPosition(fLeafCount++); 451 452 // 453 // Ok, so now we have to iterate the new tree and do a little more 454 // work now that we know the leaf count. One thing we need to do is 455 // to calculate the first and last position sets of each node. This 456 // is cached away in each of the nodes. 457 // 458 // Along the way we also set the leaf count in each node as the 459 // maximum state count. They must know this in order to create their 460 // first/last pos sets. 461 // 462 // We also need to build an array of references to the non-epsilon 463 // leaf nodes. Since we iterate it in the same way as before, this 464 // will put them in the array according to their position values. 465 // 466 fLeafList = new CMLeaf[fLeafCount]; 467 fLeafListType = new int[fLeafCount]; 468 postTreeBuildInit(fHeadNode, 0); 469 470 // 471 // And, moving onward... We now need to build the follow position 472 // sets for all the nodes. So we allocate an array of state sets, 473 // one for each leaf node (i.e. each DFA position.) 474 // 475 fFollowList = new CMStateSet[fLeafCount]; 476 for (int index = 0; index < fLeafCount; index++) 477 fFollowList[index] = new CMStateSet(fLeafCount); 478 calcFollowList(fHeadNode); 479 // 480 // And finally the big push... Now we build the DFA using all the 481 // states and the tree we've built up. First we set up the various 482 // data structures we are going to use while we do this. 483 // 484 // First of all we need an array of unique element names in our 485 // content model. For each transition table entry, we need a set of 486 // contiguous indices to represent the transitions for a particular 487 // input element. So we need to a zero based range of indexes that 488 // map to element types. This element map provides that mapping. 489 // 490 fElemMap = new QName[fLeafCount]; 491 fElemMapType = new int[fLeafCount]; 492 fElemMapSize = 0; 493 for (int outIndex = 0; outIndex < fLeafCount; outIndex++) 494 { 495 fElemMap[outIndex] = new QName(); 496 497 /**** 498 if ( (fLeafListType[outIndex] & 0x0f) != 0 ) { 499 if (fLeafNameTypeVector == null) { 500 fLeafNameTypeVector = new ContentLeafNameTypeVector(); 501 } 502 } 503 /****/ 504 505 // Get the current leaf's element index 506 final QName element = fLeafList[outIndex].getElement(); 507 508 // See if the current leaf node's element index is in the list 509 int inIndex = 0; 510 for (; inIndex < fElemMapSize; inIndex++) 511 { 512 if (fElemMap[inIndex].rawname == element.rawname) { 513 break; 514 } 515 } 516 517 // If it was not in the list, then add it, if not the EOC node 518 if (inIndex == fElemMapSize) { 519 fElemMap[fElemMapSize].setValues(element); 520 fElemMapType[fElemMapSize] = fLeafListType[outIndex]; 521 fElemMapSize++; 522 } 523 } 524 // set up the fLeafNameTypeVector object if there is one. 525 /***** 526 if (fLeafNameTypeVector != null) { 527 fLeafNameTypeVector.setValues(fElemMap, fElemMapType, fElemMapSize); 528 } 529 530 /*** 531 * Optimization(Jan, 2001); We sort fLeafList according to 532 * elemIndex which is *uniquely* associated to each leaf. 533 * We are *assuming* that each element appears in at least one leaf. 534 **/ 535 536 int[] fLeafSorter = new int[fLeafCount + fElemMapSize]; 537 int fSortCount = 0; 538 539 for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) { 540 for (int leafIndex = 0; leafIndex < fLeafCount; leafIndex++) { 541 final QName leaf = fLeafList[leafIndex].getElement(); 542 final QName element = fElemMap[elemIndex]; 543 if (leaf.rawname == element.rawname) { 544 fLeafSorter[fSortCount++] = leafIndex; 545 } 546 } 547 fLeafSorter[fSortCount++] = -1; 548 } 549 550 /* Optimization(Jan, 2001) */ 551 552 // 553 // Next lets create some arrays, some that that hold transient 554 // information during the DFA build and some that are permament. 555 // These are kind of sticky since we cannot know how big they will 556 // get, but we don't want to use any Java collections because of 557 // performance. 558 // 559 // Basically they will probably be about fLeafCount*2 on average, 560 // but can be as large as 2^(fLeafCount*2), worst case. So we start 561 // with fLeafCount*4 as a middle ground. This will be very unlikely 562 // to ever have to expand, though it if does, the overhead will be 563 // somewhat ugly. 564 // 565 int curArraySize = fLeafCount * 4; 566 CMStateSet[] statesToDo = new CMStateSet[curArraySize]; 567 fFinalStateFlags = new boolean[curArraySize]; 568 fTransTable = new int[curArraySize][]; 569 570 // 571 // Ok we start with the initial set as the first pos set of the 572 // head node (which is the seq node that holds the content model 573 // and the EOC node.) 574 // 575 CMStateSet setT = fHeadNode.firstPos(); 576 577 // 578 // Init our two state flags. Basically the unmarked state counter 579 // is always chasing the current state counter. When it catches up, 580 // that means we made a pass through that did not add any new states 581 // to the lists, at which time we are done. We could have used a 582 // expanding array of flags which we used to mark off states as we 583 // complete them, but this is easier though less readable maybe. 584 // 585 int unmarkedState = 0; 586 int curState = 0; 587 588 // 589 // Init the first transition table entry, and put the initial state 590 // into the states to do list, then bump the current state. 591 // 592 fTransTable[curState] = makeDefStateList(); 593 statesToDo[curState] = setT; 594 curState++; 595 596 /* Optimization(Jan, 2001); This is faster for 597 * a large content model such as, "(t001+|t002+|.... |t500+)". 598 */ 599 600 HashMap stateTable = new HashMap(); 601 602 /* Optimization(Jan, 2001) */ 603 604 // 605 // Ok, almost done with the algorithm... We now enter the 606 // loop where we go until the states done counter catches up with 607 // the states to do counter. 608 // 609 while (unmarkedState < curState) 610 { 611 // 612 // Get the first unmarked state out of the list of states to do. 613 // And get the associated transition table entry. 614 // 615 setT = statesToDo[unmarkedState]; 616 int[] transEntry = fTransTable[unmarkedState]; 617 618 // Mark this one final if it contains the EOC state 619 fFinalStateFlags[unmarkedState] = setT.getBit(fEOCPos); 620 621 // Bump up the unmarked state count, marking this state done 622 unmarkedState++; 623 624 // Loop through each possible input symbol in the element map 625 CMStateSet newSet = null; 626 /* Optimization(Jan, 2001) */ 627 int sorterIndex = 0; 628 /* Optimization(Jan, 2001) */ 629 for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) 630 { 631 // 632 // Build up a set of states which is the union of all of 633 // the follow sets of DFA positions that are in the current 634 // state. If we gave away the new set last time through then 635 // create a new one. Otherwise, zero out the existing one. 636 // 637 if (newSet == null) 638 newSet = new CMStateSet(fLeafCount); 639 else 640 newSet.zeroBits(); 641 642 /* Optimization(Jan, 2001) */ 643 int leafIndex = fLeafSorter[sorterIndex++]; 644 645 while (leafIndex != -1) { 646 // If this leaf index (DFA position) is in the current set... 647 if (setT.getBit(leafIndex)) 648 { 649 // 650 // If this leaf is the current input symbol, then we 651 // want to add its follow list to the set of states to 652 // transition to from the current state. 653 // 654 newSet.union(fFollowList[leafIndex]); 655 } 656 657 leafIndex = fLeafSorter[sorterIndex++]; 658 } 659 /* Optimization(Jan, 2001) */ 660 661 // 662 // If this new set is not empty, then see if its in the list 663 // of states to do. If not, then add it. 664 // 665 if (!newSet.isEmpty()) 666 { 667 // 668 // Search the 'states to do' list to see if this new 669 // state set is already in there. 670 // 671 672 /* Optimization(Jan, 2001) */ 673 Integer stateObj = (Integer)stateTable.get(newSet); 674 int stateIndex = (stateObj == null ? curState : stateObj.intValue()); 675 /* Optimization(Jan, 2001) */ 676 677 // If we did not find it, then add it 678 if (stateIndex == curState) 679 { 680 // 681 // Put this new state into the states to do and init 682 // a new entry at the same index in the transition 683 // table. 684 // 685 statesToDo[curState] = newSet; 686 fTransTable[curState] = makeDefStateList(); 687 688 /* Optimization(Jan, 2001) */ 689 stateTable.put(newSet, new Integer(curState)); 690 /* Optimization(Jan, 2001) */ 691 692 // We now have a new state to do so bump the count 693 curState++; 694 695 // 696 // Null out the new set to indicate we adopted it. 697 // This will cause the creation of a new set on the 698 // next time around the loop. 699 // 700 newSet = null; 701 } 702 703 // 704 // Now set this state in the transition table's entry 705 // for this element (using its index), with the DFA 706 // state we will move to from the current state when we 707 // see this input element. 708 // 709 transEntry[elemIndex] = stateIndex; 710 711 // Expand the arrays if we're full 712 if (curState == curArraySize) 713 { 714 // 715 // Yikes, we overflowed the initial array size, so 716 // we've got to expand all of these arrays. So adjust 717 // up the size by 50% and allocate new arrays. 718 // 719 final int newSize = (int)(curArraySize * 1.5); 720 CMStateSet[] newToDo = new CMStateSet[newSize]; 721 boolean[] newFinalFlags = new boolean[newSize]; 722 int[][] newTransTable = new int[newSize][]; 723 724 // Copy over all of the existing content 725 System.arraycopy(statesToDo, 0, newToDo, 0, curArraySize); 726 System.arraycopy(fFinalStateFlags, 0, newFinalFlags, 0, curArraySize); 727 System.arraycopy(fTransTable, 0, newTransTable, 0, curArraySize); 728 729 // Store the new array size 730 curArraySize = newSize; 731 statesToDo = newToDo; 732 fFinalStateFlags = newFinalFlags; 733 fTransTable = newTransTable; 734 } 735 } 736 } 737 } 738 739 // Check to see if we can set the fEmptyContentIsValid flag. 740 fEmptyContentIsValid = ((CMBinOp)fHeadNode).getLeft().isNullable(); 741 742 // 743 // And now we can say bye bye to the temp representation since we've 744 // built the DFA. 745 // 746 if (DEBUG_VALIDATE_CONTENT) 747 dumpTree(fHeadNode, 0); 748 fHeadNode = null; 749 fLeafList = null; 750 fFollowList = null; 751 752 } 753 754 /** 755 * Calculates the follow list of the current node. 756 * 757 * @param nodeCur The curent node. 758 * 759 * @exception CMException Thrown if follow list cannot be calculated. 760 */ 761 private void calcFollowList(CMNode nodeCur) 762 { 763 // Recurse as required 764 if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_CHOICE) 765 { 766 // Recurse only 767 calcFollowList(((CMBinOp)nodeCur).getLeft()); 768 calcFollowList(((CMBinOp)nodeCur).getRight()); 769 } 770 else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_SEQ) 771 { 772 // Recurse first 773 calcFollowList(((CMBinOp)nodeCur).getLeft()); 774 calcFollowList(((CMBinOp)nodeCur).getRight()); 775 776 // 777 // Now handle our level. We use our left child's last pos 778 // set and our right child's first pos set, so go ahead and 779 // get them ahead of time. 780 // 781 final CMStateSet last = ((CMBinOp)nodeCur).getLeft().lastPos(); 782 final CMStateSet first = ((CMBinOp)nodeCur).getRight().firstPos(); 783 784 // 785 // Now, for every position which is in our left child's last set 786 // add all of the states in our right child's first set to the 787 // follow set for that position. 788 // 789 for (int index = 0; index < fLeafCount; index++) 790 { 791 if (last.getBit(index)) 792 fFollowList[index].union(first); 793 } 794 } 795 /*** 796 else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE) 797 { 798 // Recurse first 799 calcFollowList(((CMUniOp)nodeCur).getChild()); 800 801 // 802 // Now handle our level. We use our own first and last position 803 // sets, so get them up front. 804 // 805 final CMStateSet first = nodeCur.firstPos(); 806 final CMStateSet last = nodeCur.lastPos(); 807 808 // 809 // For every position which is in our last position set, add all 810 // of our first position states to the follow set for that 811 // position. 812 // 813 for (int index = 0; index < fLeafCount; index++) 814 { 815 if (last.getBit(index)) 816 fFollowList[index].union(first); 817 } 818 } 819 else if ((nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ONE_OR_MORE) 820 || (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_ONE)) 821 { 822 throw new RuntimeException("ImplementationMessages.VAL_NIICM"); 823 } 824 /***/ 825 else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE 826 || nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ONE_OR_MORE) 827 { 828 // Recurse first 829 calcFollowList(((CMUniOp)nodeCur).getChild()); 830 831 // 832 // Now handle our level. We use our own first and last position 833 // sets, so get them up front. 834 // 835 final CMStateSet first = nodeCur.firstPos(); 836 final CMStateSet last = nodeCur.lastPos(); 837 838 // 839 // For every position which is in our last position set, add all 840 // of our first position states to the follow set for that 841 // position. 842 // 843 for (int index = 0; index < fLeafCount; index++) 844 { 845 if (last.getBit(index)) 846 fFollowList[index].union(first); 847 } 848 } 849 850 else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_ONE) { 851 // Recurse only 852 calcFollowList(((CMUniOp)nodeCur).getChild()); 853 } 854 /***/ 855 } 856 857 /** 858 * Dumps the tree of the current node to standard output. 859 * 860 * @param nodeCur The current node. 861 * @param level The maximum levels to output. 862 * 863 * @exception CMException Thrown on error. 864 */ 865 private void dumpTree(CMNode nodeCur, int level) 866 { 867 for (int index = 0; index < level; index++) 868 System.out.print(" "); 869 870 int type = nodeCur.type(); 871 if ((type == XMLContentSpec.CONTENTSPECNODE_CHOICE) 872 || (type == XMLContentSpec.CONTENTSPECNODE_SEQ)) 873 { 874 if (type == XMLContentSpec.CONTENTSPECNODE_CHOICE) 875 System.out.print("Choice Node "); 876 else 877 System.out.print("Seq Node "); 878 879 if (nodeCur.isNullable()) 880 System.out.print("Nullable "); 881 882 System.out.print("firstPos="); 883 System.out.print(nodeCur.firstPos().toString()); 884 System.out.print(" lastPos="); 885 System.out.println(nodeCur.lastPos().toString()); 886 887 dumpTree(((CMBinOp)nodeCur).getLeft(), level+1); 888 dumpTree(((CMBinOp)nodeCur).getRight(), level+1); 889 } 890 else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE) 891 { 892 System.out.print("Rep Node "); 893 894 if (nodeCur.isNullable()) 895 System.out.print("Nullable "); 896 897 System.out.print("firstPos="); 898 System.out.print(nodeCur.firstPos().toString()); 899 System.out.print(" lastPos="); 900 System.out.println(nodeCur.lastPos().toString()); 901 902 dumpTree(((CMUniOp)nodeCur).getChild(), level+1); 903 } 904 else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_LEAF) 905 { 906 System.out.print 907 ( 908 "Leaf: (pos=" 909 + ((CMLeaf)nodeCur).getPosition() 910 + "), " 911 + ((CMLeaf)nodeCur).getElement() 912 + "(elemIndex=" 913 + ((CMLeaf)nodeCur).getElement() 914 + ") " 915 ); 916 917 if (nodeCur.isNullable()) 918 System.out.print(" Nullable "); 919 920 System.out.print("firstPos="); 921 System.out.print(nodeCur.firstPos().toString()); 922 System.out.print(" lastPos="); 923 System.out.println(nodeCur.lastPos().toString()); 924 } 925 else 926 { 927 throw new RuntimeException("ImplementationMessages.VAL_NIICM"); 928 } 929 } 930 931 932 /** 933 * -1 is used to represent bad transitions in the transition table 934 * entry for each state. So each entry is initialized to an all -1 935 * array. This method creates a new entry and initializes it. 936 */ 937 private int[] makeDefStateList() 938 { 939 int[] retArray = new int[fElemMapSize]; 940 for (int index = 0; index < fElemMapSize; index++) 941 retArray[index] = -1; 942 return retArray; 943 } 944 945 /** Post tree build initialization. */ 946 private int postTreeBuildInit(CMNode nodeCur, int curIndex) 947 { 948 // Set the maximum states on this node 949 nodeCur.setMaxStates(fLeafCount); 950 951 // Recurse as required 952 if ((nodeCur.type() & 0x0f) == XMLContentSpec.CONTENTSPECNODE_ANY || 953 (nodeCur.type() & 0x0f) == XMLContentSpec.CONTENTSPECNODE_ANY_LOCAL || 954 (nodeCur.type() & 0x0f) == XMLContentSpec.CONTENTSPECNODE_ANY_OTHER) { 955 // REVISIT: Don't waste these structures. 956 QName qname = new QName(null, null, null, ((CMAny)nodeCur).getURI()); 957 fLeafList[curIndex] = new CMLeaf(qname, ((CMAny)nodeCur).getPosition()); 958 fLeafListType[curIndex] = nodeCur.type(); 959 curIndex++; 960 } 961 else if ((nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_CHOICE) 962 || (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_SEQ)) 963 { 964 curIndex = postTreeBuildInit(((CMBinOp)nodeCur).getLeft(), curIndex); 965 curIndex = postTreeBuildInit(((CMBinOp)nodeCur).getRight(), curIndex); 966 } 967 else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE 968 || nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ONE_OR_MORE 969 || nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_ZERO_OR_ONE) 970 { 971 curIndex = postTreeBuildInit(((CMUniOp)nodeCur).getChild(), curIndex); 972 } 973 else if (nodeCur.type() == XMLContentSpec.CONTENTSPECNODE_LEAF) 974 { 975 // 976 // Put this node in the leaf list at the current index if its 977 // a non-epsilon leaf. 978 // 979 final QName node = ((CMLeaf)nodeCur).getElement(); 980 if (node.localpart != fEpsilonString) { 981 fLeafList[curIndex] = (CMLeaf)nodeCur; 982 fLeafListType[curIndex] = XMLContentSpec.CONTENTSPECNODE_LEAF; 983 curIndex++; 984 } 985 } 986 else 987 { 988 throw new RuntimeException("ImplementationMessages.VAL_NIICM: type="+nodeCur.type()); 989 } 990 return curIndex; 991 } 992 993 } // class DFAContentModel