1 /*
   2  * Copyright (c) 2006, 2009, Oracle and/or its affiliates. All rights reserved.
   3  */
   4 /*
   5  * Licensed to the Apache Software Foundation (ASF) under one or more
   6  * contributor license agreements.  See the NOTICE file distributed with
   7  * this work for additional information regarding copyright ownership.
   8  * The ASF licenses this file to You under the Apache License, Version 2.0
   9  * (the "License"); you may not use this file except in compliance with
  10  * the License.  You may obtain a copy of the License at
  11  *
  12  *      http://www.apache.org/licenses/LICENSE-2.0
  13  *
  14  * Unless required by applicable law or agreed to in writing, software
  15  * distributed under the License is distributed on an "AS IS" BASIS,
  16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17  * See the License for the specific language governing permissions and
  18  * limitations under the License.
  19  */
  20 
  21 package com.sun.org.apache.xerces.internal.impl.xs.models;
  22 
  23 import com.sun.org.apache.xerces.internal.impl.dtd.models.CMNode;
  24 import com.sun.org.apache.xerces.internal.impl.xs.SchemaSymbols;
  25 import com.sun.org.apache.xerces.internal.impl.xs.XSComplexTypeDecl;
  26 import com.sun.org.apache.xerces.internal.impl.xs.XSDeclarationPool;
  27 import com.sun.org.apache.xerces.internal.impl.xs.XSElementDecl;
  28 import com.sun.org.apache.xerces.internal.impl.xs.XSModelGroupImpl;
  29 import com.sun.org.apache.xerces.internal.impl.xs.XSParticleDecl;
  30 
  31 /**
  32  * This class constructs content models for a given grammar.
  33  *
  34  * @xerces.internal
  35  *
  36  * @author Elena Litani, IBM
  37  * @author Sandy Gao, IBM
  38  *
  39  */
  40 public class CMBuilder {
  41 
  42     // REVISIT: should update the decl pool to cache XSCM objects too
  43     private XSDeclarationPool fDeclPool = null;
  44 
  45     // It never changes, so a static member is good enough
  46     private static XSEmptyCM fEmptyCM = new XSEmptyCM();
  47 
  48     // needed for DFA construction
  49     private int fLeafCount;
  50     // needed for UPA
  51     private int fParticleCount;
  52     //Factory to create Bin, Uni, Leaf nodes
  53     private CMNodeFactory fNodeFactory ;
  54 
  55     public CMBuilder(CMNodeFactory nodeFactory) {
  56         fDeclPool = null;
  57         fNodeFactory = nodeFactory ;
  58     }
  59 
  60     public void setDeclPool(XSDeclarationPool declPool) {
  61         fDeclPool = declPool;
  62     }
  63 
  64     /**
  65      * Get content model for the a given type
  66      *
  67      * @param typeDecl  get content model for which complex type
  68      * @param forUPA    a flag indicating whether it is for UPA
  69      * @return          a content model validator
  70      */
  71     public XSCMValidator getContentModel(XSComplexTypeDecl typeDecl, boolean forUPA) {
  72 
  73         // for complex type with empty or simple content,
  74         // there is no content model validator
  75         short contentType = typeDecl.getContentType();
  76         if (contentType == XSComplexTypeDecl.CONTENTTYPE_SIMPLE ||
  77             contentType == XSComplexTypeDecl.CONTENTTYPE_EMPTY) {
  78             return null;
  79         }
  80 
  81         XSParticleDecl particle = (XSParticleDecl)typeDecl.getParticle();
  82 
  83         // if the content is element only or mixed, but no particle
  84         // is defined, return the empty content model
  85         if (particle == null)
  86             return fEmptyCM;
  87 
  88         // if the content model contains "all" model group,
  89         // we create an "all" content model, otherwise a DFA content model
  90         XSCMValidator cmValidator = null;
  91         if (particle.fType == XSParticleDecl.PARTICLE_MODELGROUP &&
  92             ((XSModelGroupImpl)particle.fValue).fCompositor == XSModelGroupImpl.MODELGROUP_ALL) {
  93             cmValidator = createAllCM(particle);
  94         }
  95         else {
  96             cmValidator = createDFACM(particle, forUPA);
  97         }
  98 
  99         //now we are throught building content model and have passed sucessfully of the nodecount check
 100         //if set by the application
 101         fNodeFactory.resetNodeCount() ;
 102 
 103         // if the validator returned is null, it means there is nothing in
 104         // the content model, so we return the empty content model.
 105         if (cmValidator == null)
 106             cmValidator = fEmptyCM;
 107 
 108         return cmValidator;
 109     }
 110 
 111     XSCMValidator createAllCM(XSParticleDecl particle) {
 112         if (particle.fMaxOccurs == 0)
 113             return null;
 114 
 115         // get the model group, and add all children of it to the content model
 116         XSModelGroupImpl group = (XSModelGroupImpl)particle.fValue;
 117         // create an all content model. the parameter indicates whether
 118         // the <all> itself is optional
 119         XSAllCM allContent = new XSAllCM(particle.fMinOccurs == 0, group.fParticleCount);
 120         for (int i = 0; i < group.fParticleCount; i++) {
 121             // add the element decl to the all content model
 122             allContent.addElement((XSElementDecl)group.fParticles[i].fValue,
 123             group.fParticles[i].fMinOccurs == 0);
 124         }
 125         return allContent;
 126     }
 127 
 128     XSCMValidator createDFACM(XSParticleDecl particle, boolean forUPA) {
 129         fLeafCount = 0;
 130         fParticleCount = 0;
 131         // convert particle tree to CM tree
 132         CMNode node = useRepeatingLeafNodes(particle) ? buildCompactSyntaxTree(particle) : buildSyntaxTree(particle, forUPA, true);
 133         if (node == null)
 134             return null;
 135         // build DFA content model from the CM tree
 136         return new XSDFACM(node, fLeafCount);
 137     }
 138 
 139     // 1. convert particle tree to CM tree:
 140     // 2. expand all occurrence values: a{n, unbounded} -> a, a, ..., a+
 141     //                                  a{n, m} -> a, a, ..., a?, a?, ...
 142     // 3. convert model groups (a, b, c, ...) or (a | b | c | ...) to
 143     //    binary tree: (((a,b),c),...) or (((a|b)|c)|...)
 144     // 4. make sure each leaf node (XSCMLeaf) has a distinct position
 145     private CMNode buildSyntaxTree(XSParticleDecl particle, boolean forUPA, boolean optimize) {
 146 
 147         int maxOccurs = particle.fMaxOccurs;
 148         int minOccurs = particle.fMinOccurs;
 149 
 150         boolean compactedForUPA = false;
 151         if (forUPA) {
 152             // When doing UPA, we reduce the size of the minOccurs/maxOccurs values to make
 153             // processing the DFA faster.  For UPA the exact values don't matter.
 154             if (minOccurs > 1) {
 155                 if (maxOccurs > minOccurs || particle.getMaxOccursUnbounded()) {
 156                     minOccurs = 1;
 157                     compactedForUPA = true;
 158                 }
 159                 else { // maxOccurs == minOccurs
 160                     minOccurs = 2;
 161                     compactedForUPA = true;
 162                 }
 163             }
 164             if (maxOccurs > 1) {
 165                 maxOccurs = 2;
 166                 compactedForUPA = true;
 167             }
 168         }
 169 
 170         short type = particle.fType;
 171         CMNode nodeRet = null;
 172 
 173         if ((type == XSParticleDecl.PARTICLE_WILDCARD) ||
 174             (type == XSParticleDecl.PARTICLE_ELEMENT)) {
 175             // (task 1) element and wildcard particles should be converted to
 176             // leaf nodes
 177             // REVISIT: Make a clone of the leaf particle, so that if there
 178             // are two references to the same group, we have two different
 179             // leaf particles for the same element or wildcard decl.
 180             // This is useful for checking UPA.
 181             nodeRet = fNodeFactory.getCMLeafNode(particle.fType, particle.fValue, fParticleCount++, fLeafCount++);
 182             // (task 2) expand occurrence values
 183             nodeRet = expandContentModel(nodeRet, minOccurs, maxOccurs, optimize);
 184             if (nodeRet != null) {
 185                 nodeRet.setIsCompactUPAModel(compactedForUPA);
 186             }
 187         }
 188         else if (type == XSParticleDecl.PARTICLE_MODELGROUP) {
 189             // (task 1,3) convert model groups to binary trees
 190             XSModelGroupImpl group = (XSModelGroupImpl)particle.fValue;
 191             CMNode temp = null;
 192             // when the model group is a choice of more than one particles, but
 193             // only one of the particle is not empty, (for example
 194             // <choice>
 195             //   <sequence/>
 196             //   <element name="e"/>
 197             // </choice>
 198             // ) we can't not return that one particle ("e"). instead, we should
 199             // treat such particle as optional ("e?").
 200             // the following boolean variable is true when there are at least
 201             // 2 non-empty children.
 202             boolean twoChildren = false;
 203             for (int i = 0; i < group.fParticleCount; i++) {
 204                 // first convert each child to a CM tree
 205                 temp = buildSyntaxTree(group.fParticles[i],
 206                         forUPA,
 207                         optimize &&
 208                         minOccurs == 1 && maxOccurs == 1 &&
 209                         (group.fCompositor == XSModelGroupImpl.MODELGROUP_SEQUENCE ||
 210                          group.fParticleCount == 1));
 211                 // then combine them using binary operation
 212                 if (temp != null) {
 213                     compactedForUPA |= temp.isCompactedForUPA();
 214                     if (nodeRet == null) {
 215                         nodeRet = temp;
 216                     }
 217                     else {
 218                         nodeRet = fNodeFactory.getCMBinOpNode(group.fCompositor, nodeRet, temp);
 219                         // record the fact that there are at least 2 children
 220                         twoChildren = true;
 221                     }
 222                 }
 223             }
 224             // (task 2) expand occurrence values
 225             if (nodeRet != null) {
 226                 // when the group is "choice", there is only one non-empty
 227                 // child, and the group had more than one children, we need
 228                 // to create a zero-or-one (optional) node for the non-empty
 229                 // particle.
 230                 if (group.fCompositor == XSModelGroupImpl.MODELGROUP_CHOICE &&
 231                     !twoChildren && group.fParticleCount > 1) {
 232                     nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_ONE, nodeRet);
 233                 }
 234                 nodeRet = expandContentModel(nodeRet, minOccurs, maxOccurs, false);
 235                 nodeRet.setIsCompactUPAModel(compactedForUPA);
 236             }
 237         }
 238 
 239         return nodeRet;
 240     }
 241 
 242     // 2. expand all occurrence values: a{n, unbounded} -> a, a, ..., a+
 243     //                                  a{n, m} -> a, a, ..., a?, a?, ...
 244     // 4. make sure each leaf node (XSCMLeaf) has a distinct position
 245     private CMNode expandContentModel(CMNode node,
 246                                       int minOccurs, int maxOccurs, boolean optimize) {
 247 
 248         CMNode nodeRet = null;
 249 
 250         if (minOccurs==1 && maxOccurs==1) {
 251             nodeRet = node;
 252         }
 253         else if (minOccurs==0 && maxOccurs==1) {
 254             //zero or one
 255             nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_ONE, node);
 256         }
 257         else if (minOccurs == 0 && maxOccurs==SchemaSymbols.OCCURRENCE_UNBOUNDED) {
 258             //zero or more
 259             nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_MORE, node);
 260         }
 261         else if (minOccurs == 1 && maxOccurs==SchemaSymbols.OCCURRENCE_UNBOUNDED) {
 262             //one or more
 263             nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ONE_OR_MORE, node);
 264         }
 265         else if (optimize && node.type() == XSParticleDecl.PARTICLE_ELEMENT ||
 266                  node.type() == XSParticleDecl.PARTICLE_WILDCARD) {
 267             // Only for elements and wildcards, subsume e{n,m} and e{n,unbounded} to e*
 268             // or e+ and, once the DFA reaches a final state, check if the actual number
 269             // of elements is between minOccurs and maxOccurs. This new algorithm runs
 270             // in constant space.
 271 
 272             // TODO: What is the impact of this optimization on the PSVI?
 273             nodeRet = fNodeFactory.getCMUniOpNode(
 274                     minOccurs == 0 ? XSParticleDecl.PARTICLE_ZERO_OR_MORE
 275                         : XSParticleDecl.PARTICLE_ONE_OR_MORE, node);
 276             nodeRet.setUserData(new int[] { minOccurs, maxOccurs });
 277         }
 278         else if (maxOccurs == SchemaSymbols.OCCURRENCE_UNBOUNDED) {
 279             // => a,a,..,a+
 280             // create a+ node first, then put minOccurs-1 a's in front of it
 281             // for the first time "node" is used, we don't need to make a copy
 282             // and for other references to node, we make copies
 283             nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ONE_OR_MORE, node);
 284             // (task 4) we need to call copyNode here, so that we append
 285             // an entire new copy of the node (a subtree). this is to ensure
 286             // all leaf nodes have distinct position
 287             // we know that minOccurs > 1
 288             nodeRet = fNodeFactory.getCMBinOpNode(XSModelGroupImpl.MODELGROUP_SEQUENCE,
 289                                                   multiNodes(node, minOccurs-1, true), nodeRet);
 290         }
 291         else {
 292             // {n,m} => a,a,a,...(a),(a),...
 293             // first n a's, then m-n a?'s.
 294             // copyNode is called, for the same reason as above
 295             if (minOccurs > 0) {
 296                 nodeRet = multiNodes(node, minOccurs, false);
 297             }
 298             if (maxOccurs > minOccurs) {
 299                 node = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_ONE, node);
 300                 if (nodeRet == null) {
 301                     nodeRet = multiNodes(node, maxOccurs-minOccurs, false);
 302                 }
 303                 else {
 304                     nodeRet = fNodeFactory.getCMBinOpNode(XSModelGroupImpl.MODELGROUP_SEQUENCE,
 305                                                           nodeRet, multiNodes(node, maxOccurs-minOccurs, true));
 306                 }
 307             }
 308         }
 309 
 310         return nodeRet;
 311     }
 312 
 313     private CMNode multiNodes(CMNode node, int num, boolean copyFirst) {
 314         if (num == 0) {
 315             return null;
 316         }
 317         if (num == 1) {
 318             return copyFirst ? copyNode(node) : node;
 319         }
 320         int num1 = num/2;
 321         return fNodeFactory.getCMBinOpNode(XSModelGroupImpl.MODELGROUP_SEQUENCE,
 322                                            multiNodes(node, num1, copyFirst),
 323                                            multiNodes(node, num-num1, true));
 324     }
 325 
 326     // 4. make sure each leaf node (XSCMLeaf) has a distinct position
 327     private CMNode copyNode(CMNode node) {
 328         int type = node.type();
 329         // for choice or sequence, copy the two subtrees, and combine them
 330         if (type == XSModelGroupImpl.MODELGROUP_CHOICE ||
 331             type == XSModelGroupImpl.MODELGROUP_SEQUENCE) {
 332             XSCMBinOp bin = (XSCMBinOp)node;
 333             node = fNodeFactory.getCMBinOpNode(type, copyNode(bin.getLeft()),
 334                                  copyNode(bin.getRight()));
 335         }
 336         // for ?+*, copy the subtree, and put it in a new ?+* node
 337         else if (type == XSParticleDecl.PARTICLE_ZERO_OR_MORE ||
 338                  type == XSParticleDecl.PARTICLE_ONE_OR_MORE ||
 339                  type == XSParticleDecl.PARTICLE_ZERO_OR_ONE) {
 340             XSCMUniOp uni = (XSCMUniOp)node;
 341             node = fNodeFactory.getCMUniOpNode(type, copyNode(uni.getChild()));
 342         }
 343         // for element/wildcard (leaf), make a new leaf node,
 344         // with a distinct position
 345         else if (type == XSParticleDecl.PARTICLE_ELEMENT ||
 346                  type == XSParticleDecl.PARTICLE_WILDCARD) {
 347             XSCMLeaf leaf = (XSCMLeaf)node;
 348             node = fNodeFactory.getCMLeafNode(leaf.type(), leaf.getLeaf(), leaf.getParticleId(), fLeafCount++);
 349         }
 350 
 351         return node;
 352     }
 353 
 354     // A special version of buildSyntaxTree() which builds a compact syntax tree
 355     // containing compound leaf nodes which carry occurence information. This method
 356     // for building the syntax tree is chosen over buildSyntaxTree() when
 357     // useRepeatingLeafNodes() returns true.
 358     private CMNode buildCompactSyntaxTree(XSParticleDecl particle) {
 359         int maxOccurs = particle.fMaxOccurs;
 360         int minOccurs = particle.fMinOccurs;
 361         short type = particle.fType;
 362         CMNode nodeRet = null;
 363 
 364         if ((type == XSParticleDecl.PARTICLE_WILDCARD) ||
 365             (type == XSParticleDecl.PARTICLE_ELEMENT)) {
 366             return buildCompactSyntaxTree2(particle, minOccurs, maxOccurs);
 367         }
 368         else if (type == XSParticleDecl.PARTICLE_MODELGROUP) {
 369             XSModelGroupImpl group = (XSModelGroupImpl)particle.fValue;
 370             if (group.fParticleCount == 1 && (minOccurs != 1 || maxOccurs != 1)) {
 371                 return buildCompactSyntaxTree2(group.fParticles[0], minOccurs, maxOccurs);
 372             }
 373             else {
 374                 CMNode temp = null;
 375 
 376                 // when the model group is a choice of more than one particles, but
 377                 // only one of the particle is not empty, (for example
 378                 // <choice>
 379                 //   <sequence/>
 380                 //   <element name="e"/>
 381                 // </choice>
 382                 // ) we can't not return that one particle ("e"). instead, we should
 383                 // treat such particle as optional ("e?").
 384                 // the following int variable keeps track of the number of non-empty children
 385                 int count = 0;
 386                 for (int i = 0; i < group.fParticleCount; i++) {
 387                     // first convert each child to a CM tree
 388                     temp = buildCompactSyntaxTree(group.fParticles[i]);
 389                     // then combine them using binary operation
 390                     if (temp != null) {
 391                         ++count;
 392                         if (nodeRet == null) {
 393                             nodeRet = temp;
 394                         }
 395                         else {
 396                             nodeRet = fNodeFactory.getCMBinOpNode(group.fCompositor, nodeRet, temp);
 397                         }
 398                     }
 399                 }
 400                 if (nodeRet != null) {
 401                     // when the group is "choice" and the group has one or more empty children,
 402                     // we need to create a zero-or-one (optional) node for the non-empty particles.
 403                     if (group.fCompositor == XSModelGroupImpl.MODELGROUP_CHOICE && count < group.fParticleCount) {
 404                         nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_ONE, nodeRet);
 405                     }
 406                 }
 407             }
 408         }
 409         return nodeRet;
 410     }
 411 
 412     private CMNode buildCompactSyntaxTree2(XSParticleDecl particle, int minOccurs, int maxOccurs) {
 413         // Convert element and wildcard particles to leaf nodes. Wrap repeating particles in a CMUniOpNode.
 414         CMNode nodeRet = null;
 415         if (minOccurs == 1 && maxOccurs == 1) {
 416             nodeRet = fNodeFactory.getCMLeafNode(particle.fType, particle.fValue, fParticleCount++, fLeafCount++);
 417         }
 418         else if (minOccurs == 0 && maxOccurs == 1) {
 419             // zero or one
 420             nodeRet = fNodeFactory.getCMLeafNode(particle.fType, particle.fValue, fParticleCount++, fLeafCount++);
 421             nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_ONE, nodeRet);
 422         }
 423         else if (minOccurs == 0 && maxOccurs==SchemaSymbols.OCCURRENCE_UNBOUNDED) {
 424             // zero or more
 425             nodeRet = fNodeFactory.getCMLeafNode(particle.fType, particle.fValue, fParticleCount++, fLeafCount++);
 426             nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_MORE, nodeRet);
 427         }
 428         else if (minOccurs == 1 && maxOccurs==SchemaSymbols.OCCURRENCE_UNBOUNDED) {
 429             // one or more
 430             nodeRet = fNodeFactory.getCMLeafNode(particle.fType, particle.fValue, fParticleCount++, fLeafCount++);
 431             nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ONE_OR_MORE, nodeRet);
 432         }
 433         else {
 434             // {n,m}: Instead of expanding this out, create a compound leaf node which carries the
 435             // occurence information and wrap it in the appropriate CMUniOpNode.
 436             nodeRet = fNodeFactory.getCMRepeatingLeafNode(particle.fType, particle.fValue, minOccurs, maxOccurs, fParticleCount++, fLeafCount++);
 437             if (minOccurs == 0) {
 438                 nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_MORE, nodeRet);
 439             }
 440             else {
 441                 nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ONE_OR_MORE, nodeRet);
 442             }
 443         }
 444         return nodeRet;
 445     }
 446 
 447     // This method checks if this particle can be transformed into a compact syntax
 448     // tree containing compound leaf nodes which carry occurence information. Currently
 449     // it returns true if each model group has minOccurs/maxOccurs == 1 or
 450     // contains only one element/wildcard particle with minOccurs/maxOccurs == 1.
 451     private boolean useRepeatingLeafNodes(XSParticleDecl particle) {
 452         int maxOccurs = particle.fMaxOccurs;
 453         int minOccurs = particle.fMinOccurs;
 454         short type = particle.fType;
 455 
 456         if (type == XSParticleDecl.PARTICLE_MODELGROUP) {
 457             XSModelGroupImpl group = (XSModelGroupImpl) particle.fValue;
 458             if (minOccurs != 1 || maxOccurs != 1) {
 459                 if (group.fParticleCount == 1) {
 460                     XSParticleDecl particle2 = (XSParticleDecl) group.fParticles[0];
 461                     short type2 = particle2.fType;
 462                     return ((type2 == XSParticleDecl.PARTICLE_ELEMENT ||
 463                             type2 == XSParticleDecl.PARTICLE_WILDCARD) &&
 464                             particle2.fMinOccurs == 1 &&
 465                             particle2.fMaxOccurs == 1);
 466                 }
 467                 return (group.fParticleCount == 0);
 468             }
 469             for (int i = 0; i < group.fParticleCount; ++i) {
 470                 if (!useRepeatingLeafNodes(group.fParticles[i])) {
 471                     return false;
 472                 }
 473             }
 474         }
 475         return true;
 476     }
 477 }