1 /*
   2  * Copyright (c) 2006, 2017, Oracle and/or its affiliates. All rights reserved.
   3  * @LastModified: Nov 2017
   4  */
   5 /*
   6  * Licensed to the Apache Software Foundation (ASF) under one or more
   7  * contributor license agreements.  See the NOTICE file distributed with
   8  * this work for additional information regarding copyright ownership.
   9  * The ASF licenses this file to You under the Apache License, Version 2.0
  10  * (the "License"); you may not use this file except in compliance with
  11  * the License.  You may obtain a copy of the License at
  12  *
  13  *      http://www.apache.org/licenses/LICENSE-2.0
  14  *
  15  * Unless required by applicable law or agreed to in writing, software
  16  * distributed under the License is distributed on an "AS IS" BASIS,
  17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  18  * See the License for the specific language governing permissions and
  19  * limitations under the License.
  20  */
  21 
  22 package com.sun.org.apache.xerces.internal.impl.xs.models;
  23 
  24 import com.sun.org.apache.xerces.internal.impl.dtd.models.CMNode;
  25 import com.sun.org.apache.xerces.internal.impl.xs.SchemaSymbols;
  26 import com.sun.org.apache.xerces.internal.impl.xs.XSComplexTypeDecl;
  27 import com.sun.org.apache.xerces.internal.impl.xs.XSDeclarationPool;
  28 import com.sun.org.apache.xerces.internal.impl.xs.XSElementDecl;
  29 import com.sun.org.apache.xerces.internal.impl.xs.XSModelGroupImpl;
  30 import com.sun.org.apache.xerces.internal.impl.xs.XSParticleDecl;
  31 
  32 /**
  33  * This class constructs content models for a given grammar.
  34  *
  35  * @xerces.internal
  36  *
  37  * @author Elena Litani, IBM
  38  * @author Sandy Gao, IBM
  39  *
  40  */
  41 public class CMBuilder {
  42 
  43     // REVISIT: should update the decl pool to cache XSCM objects too
  44     private XSDeclarationPool fDeclPool = null;
  45 
  46     // It never changes, so a static member is good enough
  47     private static XSEmptyCM fEmptyCM = new XSEmptyCM();
  48 
  49     // needed for DFA construction
  50     private int fLeafCount;
  51     // needed for UPA
  52     private int fParticleCount;
  53     //Factory to create Bin, Uni, Leaf nodes
  54     private CMNodeFactory fNodeFactory ;
  55 
  56     public CMBuilder(CMNodeFactory nodeFactory) {
  57         fDeclPool = null;
  58         fNodeFactory = nodeFactory ;
  59     }
  60 
  61     public void setDeclPool(XSDeclarationPool declPool) {
  62         fDeclPool = declPool;
  63     }
  64 
  65     /**
  66      * Get content model for the a given type
  67      *
  68      * @param typeDecl  get content model for which complex type
  69      * @param forUPA    a flag indicating whether it is for UPA
  70      * @return          a content model validator
  71      */
  72     public XSCMValidator getContentModel(XSComplexTypeDecl typeDecl, boolean forUPA) {
  73 
  74         // for complex type with empty or simple content,
  75         // there is no content model validator
  76         short contentType = typeDecl.getContentType();
  77         if (contentType == XSComplexTypeDecl.CONTENTTYPE_SIMPLE ||
  78             contentType == XSComplexTypeDecl.CONTENTTYPE_EMPTY) {
  79             return null;
  80         }
  81 
  82         XSParticleDecl particle = (XSParticleDecl)typeDecl.getParticle();
  83 
  84         // if the content is element only or mixed, but no particle
  85         // is defined, return the empty content model
  86         if (particle == null)
  87             return fEmptyCM;
  88 
  89         // if the content model contains "all" model group,
  90         // we create an "all" content model, otherwise a DFA content model
  91         XSCMValidator cmValidator = null;
  92         if (particle.fType == XSParticleDecl.PARTICLE_MODELGROUP &&
  93             ((XSModelGroupImpl)particle.fValue).fCompositor == XSModelGroupImpl.MODELGROUP_ALL) {
  94             cmValidator = createAllCM(particle);
  95         }
  96         else {
  97             cmValidator = createDFACM(particle, forUPA);
  98         }
  99 
 100         //now we are throught building content model and have passed sucessfully of the nodecount check
 101         //if set by the application
 102         fNodeFactory.resetNodeCount() ;
 103 
 104         // if the validator returned is null, it means there is nothing in
 105         // the content model, so we return the empty content model.
 106         if (cmValidator == null)
 107             cmValidator = fEmptyCM;
 108 
 109         return cmValidator;
 110     }
 111 
 112     XSCMValidator createAllCM(XSParticleDecl particle) {
 113         if (particle.fMaxOccurs == 0)
 114             return null;
 115 
 116         // get the model group, and add all children of it to the content model
 117         XSModelGroupImpl group = (XSModelGroupImpl)particle.fValue;
 118         // create an all content model. the parameter indicates whether
 119         // the <all> itself is optional
 120         XSAllCM allContent = new XSAllCM(particle.fMinOccurs == 0, group.fParticleCount);
 121         for (int i = 0; i < group.fParticleCount; i++) {
 122             // add the element decl to the all content model
 123             allContent.addElement((XSElementDecl)group.fParticles[i].fValue,
 124             group.fParticles[i].fMinOccurs == 0);
 125         }
 126         return allContent;
 127     }
 128 
 129     XSCMValidator createDFACM(XSParticleDecl particle, boolean forUPA) {
 130         fLeafCount = 0;
 131         fParticleCount = 0;
 132         // convert particle tree to CM tree
 133         CMNode node = useRepeatingLeafNodes(particle) ? buildCompactSyntaxTree(particle) : buildSyntaxTree(particle, forUPA, true);
 134         if (node == null)
 135             return null;
 136         // build DFA content model from the CM tree
 137         return new XSDFACM(node, fLeafCount);
 138     }
 139 
 140     // 1. convert particle tree to CM tree:
 141     // 2. expand all occurrence values: a{n, unbounded} -> a, a, ..., a+
 142     //                                  a{n, m} -> a, a, ..., a?, a?, ...
 143     // 3. convert model groups (a, b, c, ...) or (a | b | c | ...) to
 144     //    binary tree: (((a,b),c),...) or (((a|b)|c)|...)
 145     // 4. make sure each leaf node (XSCMLeaf) has a distinct position
 146     private CMNode buildSyntaxTree(XSParticleDecl particle, boolean forUPA, boolean optimize) {
 147 
 148         int maxOccurs = particle.fMaxOccurs;
 149         int minOccurs = particle.fMinOccurs;
 150 
 151         boolean compactedForUPA = false;
 152         if (forUPA) {
 153             // When doing UPA, we reduce the size of the minOccurs/maxOccurs values to make
 154             // processing the DFA faster.  For UPA the exact values don't matter.
 155             if (minOccurs > 1) {
 156                 if (maxOccurs > minOccurs || particle.getMaxOccursUnbounded()) {
 157                     minOccurs = 1;
 158                     compactedForUPA = true;
 159                 }
 160                 else { // maxOccurs == minOccurs
 161                     minOccurs = 2;
 162                     compactedForUPA = true;
 163                 }
 164             }
 165             if (maxOccurs > 1) {
 166                 maxOccurs = 2;
 167                 compactedForUPA = true;
 168             }
 169         }
 170 
 171         short type = particle.fType;
 172         CMNode nodeRet = null;
 173 
 174         if ((type == XSParticleDecl.PARTICLE_WILDCARD) ||
 175             (type == XSParticleDecl.PARTICLE_ELEMENT)) {
 176             // (task 1) element and wildcard particles should be converted to
 177             // leaf nodes
 178             // REVISIT: Make a clone of the leaf particle, so that if there
 179             // are two references to the same group, we have two different
 180             // leaf particles for the same element or wildcard decl.
 181             // This is useful for checking UPA.
 182             nodeRet = fNodeFactory.getCMLeafNode(particle.fType, particle.fValue, fParticleCount++, fLeafCount++);
 183             // (task 2) expand occurrence values
 184             nodeRet = expandContentModel(nodeRet, minOccurs, maxOccurs, optimize);
 185             if (nodeRet != null) {
 186                 nodeRet.setIsCompactUPAModel(compactedForUPA);
 187             }
 188         }
 189         else if (type == XSParticleDecl.PARTICLE_MODELGROUP) {
 190             // (task 1,3) convert model groups to binary trees
 191             XSModelGroupImpl group = (XSModelGroupImpl)particle.fValue;
 192             CMNode temp = null;
 193             // when the model group is a choice of more than one particles, but
 194             // only one of the particle is not empty, (for example
 195             // <choice>
 196             //   <sequence/>
 197             //   <element name="e"/>
 198             // </choice>
 199             // ) we can't not return that one particle ("e"). instead, we should
 200             // treat such particle as optional ("e?").
 201             // the following boolean variable is true when there are at least
 202             // 2 non-empty children.
 203             boolean twoChildren = false;
 204             for (int i = 0; i < group.fParticleCount; i++) {
 205                 // first convert each child to a CM tree
 206                 temp = buildSyntaxTree(group.fParticles[i],
 207                         forUPA,
 208                         optimize &&
 209                         minOccurs == 1 && maxOccurs == 1 &&
 210                         (group.fCompositor == XSModelGroupImpl.MODELGROUP_SEQUENCE ||
 211                          group.fParticleCount == 1));
 212                 // then combine them using binary operation
 213                 if (temp != null) {
 214                     compactedForUPA |= temp.isCompactedForUPA();
 215                     if (nodeRet == null) {
 216                         nodeRet = temp;
 217                     }
 218                     else {
 219                         nodeRet = fNodeFactory.getCMBinOpNode(group.fCompositor, nodeRet, temp);
 220                         // record the fact that there are at least 2 children
 221                         twoChildren = true;
 222                     }
 223                 }
 224             }
 225             // (task 2) expand occurrence values
 226             if (nodeRet != null) {
 227                 // when the group is "choice", there is only one non-empty
 228                 // child, and the group had more than one children, we need
 229                 // to create a zero-or-one (optional) node for the non-empty
 230                 // particle.
 231                 if (group.fCompositor == XSModelGroupImpl.MODELGROUP_CHOICE &&
 232                     !twoChildren && group.fParticleCount > 1) {
 233                     nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_ONE, nodeRet);
 234                 }
 235                 nodeRet = expandContentModel(nodeRet, minOccurs, maxOccurs, false);
 236                 nodeRet.setIsCompactUPAModel(compactedForUPA);
 237             }
 238         }
 239 
 240         return nodeRet;
 241     }
 242 
 243     // 2. expand all occurrence values: a{n, unbounded} -> a, a, ..., a+
 244     //                                  a{n, m} -> a, a, ..., a?, a?, ...
 245     // 4. make sure each leaf node (XSCMLeaf) has a distinct position
 246     private CMNode expandContentModel(CMNode node,
 247                                       int minOccurs, int maxOccurs, boolean optimize) {
 248 
 249         CMNode nodeRet = null;
 250 
 251         if (minOccurs==1 && maxOccurs==1) {
 252             nodeRet = node;
 253         }
 254         else if (minOccurs==0 && maxOccurs==1) {
 255             //zero or one
 256             nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_ONE, node);
 257         }
 258         else if (minOccurs == 0 && maxOccurs==SchemaSymbols.OCCURRENCE_UNBOUNDED) {
 259             //zero or more
 260             nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_MORE, node);
 261         }
 262         else if (minOccurs == 1 && maxOccurs==SchemaSymbols.OCCURRENCE_UNBOUNDED) {
 263             //one or more
 264             nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ONE_OR_MORE, node);
 265         }
 266         else if (optimize && node.type() == XSParticleDecl.PARTICLE_ELEMENT ||
 267                  node.type() == XSParticleDecl.PARTICLE_WILDCARD) {
 268             // Only for elements and wildcards, subsume e{n,m} and e{n,unbounded} to e*
 269             // or e+ and, once the DFA reaches a final state, check if the actual number
 270             // of elements is between minOccurs and maxOccurs. This new algorithm runs
 271             // in constant space.
 272 
 273             // TODO: What is the impact of this optimization on the PSVI?
 274             nodeRet = fNodeFactory.getCMUniOpNode(
 275                     minOccurs == 0 ? XSParticleDecl.PARTICLE_ZERO_OR_MORE
 276                         : XSParticleDecl.PARTICLE_ONE_OR_MORE, node);
 277             nodeRet.setUserData(new int[] { minOccurs, maxOccurs });
 278         }
 279         else if (maxOccurs == SchemaSymbols.OCCURRENCE_UNBOUNDED) {
 280             // => a,a,..,a+
 281             // create a+ node first, then put minOccurs-1 a's in front of it
 282             // for the first time "node" is used, we don't need to make a copy
 283             // and for other references to node, we make copies
 284             nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ONE_OR_MORE, node);
 285             // (task 4) we need to call copyNode here, so that we append
 286             // an entire new copy of the node (a subtree). this is to ensure
 287             // all leaf nodes have distinct position
 288             // we know that minOccurs > 1
 289             nodeRet = fNodeFactory.getCMBinOpNode(XSModelGroupImpl.MODELGROUP_SEQUENCE,
 290                                                   multiNodes(node, minOccurs-1, true), nodeRet);
 291         }
 292         else {
 293             // {n,m} => a,a,a,...(a),(a),...
 294             // first n a's, then m-n a?'s.
 295             // copyNode is called, for the same reason as above
 296             if (minOccurs > 0) {
 297                 nodeRet = multiNodes(node, minOccurs, false);
 298             }
 299             if (maxOccurs > minOccurs) {
 300                 node = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_ONE, node);
 301                 if (nodeRet == null) {
 302                     nodeRet = multiNodes(node, maxOccurs-minOccurs, false);
 303                 }
 304                 else {
 305                     nodeRet = fNodeFactory.getCMBinOpNode(XSModelGroupImpl.MODELGROUP_SEQUENCE,
 306                                                           nodeRet, multiNodes(node, maxOccurs-minOccurs, true));
 307                 }
 308             }
 309         }
 310 
 311         return nodeRet;
 312     }
 313 
 314     private CMNode multiNodes(CMNode node, int num, boolean copyFirst) {
 315         if (num == 0) {
 316             return null;
 317         }
 318         if (num == 1) {
 319             return copyFirst ? copyNode(node) : node;
 320         }
 321         int num1 = num/2;
 322         return fNodeFactory.getCMBinOpNode(XSModelGroupImpl.MODELGROUP_SEQUENCE,
 323                                            multiNodes(node, num1, copyFirst),
 324                                            multiNodes(node, num-num1, true));
 325     }
 326 
 327     // 4. make sure each leaf node (XSCMLeaf) has a distinct position
 328     private CMNode copyNode(CMNode node) {
 329         int type = node.type();
 330         // for choice or sequence, copy the two subtrees, and combine them
 331         if (type == XSModelGroupImpl.MODELGROUP_CHOICE ||
 332             type == XSModelGroupImpl.MODELGROUP_SEQUENCE) {
 333             XSCMBinOp bin = (XSCMBinOp)node;
 334             node = fNodeFactory.getCMBinOpNode(type, copyNode(bin.getLeft()),
 335                                  copyNode(bin.getRight()));
 336         }
 337         // for ?+*, copy the subtree, and put it in a new ?+* node
 338         else if (type == XSParticleDecl.PARTICLE_ZERO_OR_MORE ||
 339                  type == XSParticleDecl.PARTICLE_ONE_OR_MORE ||
 340                  type == XSParticleDecl.PARTICLE_ZERO_OR_ONE) {
 341             XSCMUniOp uni = (XSCMUniOp)node;
 342             node = fNodeFactory.getCMUniOpNode(type, copyNode(uni.getChild()));
 343         }
 344         // for element/wildcard (leaf), make a new leaf node,
 345         // with a distinct position
 346         else if (type == XSParticleDecl.PARTICLE_ELEMENT ||
 347                  type == XSParticleDecl.PARTICLE_WILDCARD) {
 348             XSCMLeaf leaf = (XSCMLeaf)node;
 349             node = fNodeFactory.getCMLeafNode(leaf.type(), leaf.getLeaf(), leaf.getParticleId(), fLeafCount++);
 350         }
 351 
 352         return node;
 353     }
 354 
 355     // A special version of buildSyntaxTree() which builds a compact syntax tree
 356     // containing compound leaf nodes which carry occurence information. This method
 357     // for building the syntax tree is chosen over buildSyntaxTree() when
 358     // useRepeatingLeafNodes() returns true.
 359     private CMNode buildCompactSyntaxTree(XSParticleDecl particle) {
 360         int maxOccurs = particle.fMaxOccurs;
 361         int minOccurs = particle.fMinOccurs;
 362         short type = particle.fType;
 363         CMNode nodeRet = null;
 364 
 365         if ((type == XSParticleDecl.PARTICLE_WILDCARD) ||
 366             (type == XSParticleDecl.PARTICLE_ELEMENT)) {
 367             return buildCompactSyntaxTree2(particle, minOccurs, maxOccurs);
 368         }
 369         else if (type == XSParticleDecl.PARTICLE_MODELGROUP) {
 370             XSModelGroupImpl group = (XSModelGroupImpl)particle.fValue;
 371             if (group.fParticleCount == 1 && (minOccurs != 1 || maxOccurs != 1)) {
 372                 return buildCompactSyntaxTree2(group.fParticles[0], minOccurs, maxOccurs);
 373             }
 374             else {
 375                 CMNode temp = null;
 376 
 377                 // when the model group is a choice of more than one particles, but
 378                 // only one of the particle is not empty, (for example
 379                 // <choice>
 380                 //   <sequence/>
 381                 //   <element name="e"/>
 382                 // </choice>
 383                 // ) we can't not return that one particle ("e"). instead, we should
 384                 // treat such particle as optional ("e?").
 385                 // the following int variable keeps track of the number of non-empty children
 386                 int count = 0;
 387                 for (int i = 0; i < group.fParticleCount; i++) {
 388                     // first convert each child to a CM tree
 389                     temp = buildCompactSyntaxTree(group.fParticles[i]);
 390                     // then combine them using binary operation
 391                     if (temp != null) {
 392                         ++count;
 393                         if (nodeRet == null) {
 394                             nodeRet = temp;
 395                         }
 396                         else {
 397                             nodeRet = fNodeFactory.getCMBinOpNode(group.fCompositor, nodeRet, temp);
 398                         }
 399                     }
 400                 }
 401                 if (nodeRet != null) {
 402                     // when the group is "choice" and the group has one or more empty children,
 403                     // we need to create a zero-or-one (optional) node for the non-empty particles.
 404                     if (group.fCompositor == XSModelGroupImpl.MODELGROUP_CHOICE && count < group.fParticleCount) {
 405                         nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_ONE, nodeRet);
 406                     }
 407                 }
 408             }
 409         }
 410         return nodeRet;
 411     }
 412 
 413     private CMNode buildCompactSyntaxTree2(XSParticleDecl particle, int minOccurs, int maxOccurs) {
 414         // Convert element and wildcard particles to leaf nodes. Wrap repeating particles in a CMUniOpNode.
 415         CMNode nodeRet = null;
 416         if (minOccurs == 1 && maxOccurs == 1) {
 417             nodeRet = fNodeFactory.getCMLeafNode(particle.fType, particle.fValue, fParticleCount++, fLeafCount++);
 418         }
 419         else if (minOccurs == 0 && maxOccurs == 1) {
 420             // zero or one
 421             nodeRet = fNodeFactory.getCMLeafNode(particle.fType, particle.fValue, fParticleCount++, fLeafCount++);
 422             nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_ONE, nodeRet);
 423         }
 424         else if (minOccurs == 0 && maxOccurs==SchemaSymbols.OCCURRENCE_UNBOUNDED) {
 425             // zero or more
 426             nodeRet = fNodeFactory.getCMLeafNode(particle.fType, particle.fValue, fParticleCount++, fLeafCount++);
 427             nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_MORE, nodeRet);
 428         }
 429         else if (minOccurs == 1 && maxOccurs==SchemaSymbols.OCCURRENCE_UNBOUNDED) {
 430             // one or more
 431             nodeRet = fNodeFactory.getCMLeafNode(particle.fType, particle.fValue, fParticleCount++, fLeafCount++);
 432             nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ONE_OR_MORE, nodeRet);
 433         }
 434         else {
 435             // {n,m}: Instead of expanding this out, create a compound leaf node which carries the
 436             // occurence information and wrap it in the appropriate CMUniOpNode.
 437             nodeRet = fNodeFactory.getCMRepeatingLeafNode(particle.fType, particle.fValue, minOccurs, maxOccurs, fParticleCount++, fLeafCount++);
 438             if (minOccurs == 0) {
 439                 nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_MORE, nodeRet);
 440             }
 441             else {
 442                 nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ONE_OR_MORE, nodeRet);
 443             }
 444         }
 445         return nodeRet;
 446     }
 447 
 448     // This method checks if this particle can be transformed into a compact syntax
 449     // tree containing compound leaf nodes which carry occurence information. Currently
 450     // it returns true if each model group has minOccurs/maxOccurs == 1 or
 451     // contains only one element/wildcard particle with minOccurs/maxOccurs == 1.
 452     private boolean useRepeatingLeafNodes(XSParticleDecl particle) {
 453         int maxOccurs = particle.fMaxOccurs;
 454         int minOccurs = particle.fMinOccurs;
 455         short type = particle.fType;
 456 
 457         if (type == XSParticleDecl.PARTICLE_MODELGROUP) {
 458             XSModelGroupImpl group = (XSModelGroupImpl) particle.fValue;
 459             if (minOccurs != 1 || maxOccurs != 1) {
 460                 if (group.fParticleCount == 1) {
 461                     XSParticleDecl particle2 = group.fParticles[0];
 462                     short type2 = particle2.fType;
 463                     return ((type2 == XSParticleDecl.PARTICLE_ELEMENT ||
 464                             type2 == XSParticleDecl.PARTICLE_WILDCARD) &&
 465                             particle2.fMinOccurs == 1 &&
 466                             particle2.fMaxOccurs == 1);
 467                 }
 468                 return (group.fParticleCount == 0);
 469             }
 470             for (int i = 0; i < group.fParticleCount; ++i) {
 471                 if (!useRepeatingLeafNodes(group.fParticles[i])) {
 472                     return false;
 473                 }
 474             }
 475         }
 476         return true;
 477     }
 478 }