1 /* 2 * Copyright (c) 2006, 2017, Oracle and/or its affiliates. All rights reserved. 3 * @LastModified: Nov 2017 4 */ 5 /* 6 * Licensed to the Apache Software Foundation (ASF) under one or more 7 * contributor license agreements. See the NOTICE file distributed with 8 * this work for additional information regarding copyright ownership. 9 * The ASF licenses this file to You under the Apache License, Version 2.0 10 * (the "License"); you may not use this file except in compliance with 11 * the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, software 16 * distributed under the License is distributed on an "AS IS" BASIS, 17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 * See the License for the specific language governing permissions and 19 * limitations under the License. 20 */ 21 22 package com.sun.org.apache.xerces.internal.impl.xs.models; 23 24 import com.sun.org.apache.xerces.internal.impl.dtd.models.CMNode; 25 import com.sun.org.apache.xerces.internal.impl.xs.SchemaSymbols; 26 import com.sun.org.apache.xerces.internal.impl.xs.XSComplexTypeDecl; 27 import com.sun.org.apache.xerces.internal.impl.xs.XSDeclarationPool; 28 import com.sun.org.apache.xerces.internal.impl.xs.XSElementDecl; 29 import com.sun.org.apache.xerces.internal.impl.xs.XSModelGroupImpl; 30 import com.sun.org.apache.xerces.internal.impl.xs.XSParticleDecl; 31 32 /** 33 * This class constructs content models for a given grammar. 34 * 35 * @xerces.internal 36 * 37 * @author Elena Litani, IBM 38 * @author Sandy Gao, IBM 39 * 40 */ 41 public class CMBuilder { 42 43 // REVISIT: should update the decl pool to cache XSCM objects too 44 private XSDeclarationPool fDeclPool = null; 45 46 // It never changes, so a static member is good enough 47 private static XSEmptyCM fEmptyCM = new XSEmptyCM(); 48 49 // needed for DFA construction 50 private int fLeafCount; 51 // needed for UPA 52 private int fParticleCount; 53 //Factory to create Bin, Uni, Leaf nodes 54 private CMNodeFactory fNodeFactory ; 55 56 public CMBuilder(CMNodeFactory nodeFactory) { 57 fDeclPool = null; 58 fNodeFactory = nodeFactory ; 59 } 60 61 public void setDeclPool(XSDeclarationPool declPool) { 62 fDeclPool = declPool; 63 } 64 65 /** 66 * Get content model for the a given type 67 * 68 * @param typeDecl get content model for which complex type 69 * @param forUPA a flag indicating whether it is for UPA 70 * @return a content model validator 71 */ 72 public XSCMValidator getContentModel(XSComplexTypeDecl typeDecl, boolean forUPA) { 73 74 // for complex type with empty or simple content, 75 // there is no content model validator 76 short contentType = typeDecl.getContentType(); 77 if (contentType == XSComplexTypeDecl.CONTENTTYPE_SIMPLE || 78 contentType == XSComplexTypeDecl.CONTENTTYPE_EMPTY) { 79 return null; 80 } 81 82 XSParticleDecl particle = (XSParticleDecl)typeDecl.getParticle(); 83 84 // if the content is element only or mixed, but no particle 85 // is defined, return the empty content model 86 if (particle == null) 87 return fEmptyCM; 88 89 // if the content model contains "all" model group, 90 // we create an "all" content model, otherwise a DFA content model 91 XSCMValidator cmValidator = null; 92 if (particle.fType == XSParticleDecl.PARTICLE_MODELGROUP && 93 ((XSModelGroupImpl)particle.fValue).fCompositor == XSModelGroupImpl.MODELGROUP_ALL) { 94 cmValidator = createAllCM(particle); 95 } 96 else { 97 cmValidator = createDFACM(particle, forUPA); 98 } 99 100 //now we are throught building content model and have passed sucessfully of the nodecount check 101 //if set by the application 102 fNodeFactory.resetNodeCount() ; 103 104 // if the validator returned is null, it means there is nothing in 105 // the content model, so we return the empty content model. 106 if (cmValidator == null) 107 cmValidator = fEmptyCM; 108 109 return cmValidator; 110 } 111 112 XSCMValidator createAllCM(XSParticleDecl particle) { 113 if (particle.fMaxOccurs == 0) 114 return null; 115 116 // get the model group, and add all children of it to the content model 117 XSModelGroupImpl group = (XSModelGroupImpl)particle.fValue; 118 // create an all content model. the parameter indicates whether 119 // the <all> itself is optional 120 XSAllCM allContent = new XSAllCM(particle.fMinOccurs == 0, group.fParticleCount); 121 for (int i = 0; i < group.fParticleCount; i++) { 122 // add the element decl to the all content model 123 allContent.addElement((XSElementDecl)group.fParticles[i].fValue, 124 group.fParticles[i].fMinOccurs == 0); 125 } 126 return allContent; 127 } 128 129 XSCMValidator createDFACM(XSParticleDecl particle, boolean forUPA) { 130 fLeafCount = 0; 131 fParticleCount = 0; 132 // convert particle tree to CM tree 133 CMNode node = useRepeatingLeafNodes(particle) ? buildCompactSyntaxTree(particle) : buildSyntaxTree(particle, forUPA, true); 134 if (node == null) 135 return null; 136 // build DFA content model from the CM tree 137 return new XSDFACM(node, fLeafCount); 138 } 139 140 // 1. convert particle tree to CM tree: 141 // 2. expand all occurrence values: a{n, unbounded} -> a, a, ..., a+ 142 // a{n, m} -> a, a, ..., a?, a?, ... 143 // 3. convert model groups (a, b, c, ...) or (a | b | c | ...) to 144 // binary tree: (((a,b),c),...) or (((a|b)|c)|...) 145 // 4. make sure each leaf node (XSCMLeaf) has a distinct position 146 private CMNode buildSyntaxTree(XSParticleDecl particle, boolean forUPA, boolean optimize) { 147 148 int maxOccurs = particle.fMaxOccurs; 149 int minOccurs = particle.fMinOccurs; 150 151 boolean compactedForUPA = false; 152 if (forUPA) { 153 // When doing UPA, we reduce the size of the minOccurs/maxOccurs values to make 154 // processing the DFA faster. For UPA the exact values don't matter. 155 if (minOccurs > 1) { 156 if (maxOccurs > minOccurs || particle.getMaxOccursUnbounded()) { 157 minOccurs = 1; 158 compactedForUPA = true; 159 } 160 else { // maxOccurs == minOccurs 161 minOccurs = 2; 162 compactedForUPA = true; 163 } 164 } 165 if (maxOccurs > 1) { 166 maxOccurs = 2; 167 compactedForUPA = true; 168 } 169 } 170 171 short type = particle.fType; 172 CMNode nodeRet = null; 173 174 if ((type == XSParticleDecl.PARTICLE_WILDCARD) || 175 (type == XSParticleDecl.PARTICLE_ELEMENT)) { 176 // (task 1) element and wildcard particles should be converted to 177 // leaf nodes 178 // REVISIT: Make a clone of the leaf particle, so that if there 179 // are two references to the same group, we have two different 180 // leaf particles for the same element or wildcard decl. 181 // This is useful for checking UPA. 182 nodeRet = fNodeFactory.getCMLeafNode(particle.fType, particle.fValue, fParticleCount++, fLeafCount++); 183 // (task 2) expand occurrence values 184 nodeRet = expandContentModel(nodeRet, minOccurs, maxOccurs, optimize); 185 if (nodeRet != null) { 186 nodeRet.setIsCompactUPAModel(compactedForUPA); 187 } 188 } 189 else if (type == XSParticleDecl.PARTICLE_MODELGROUP) { 190 // (task 1,3) convert model groups to binary trees 191 XSModelGroupImpl group = (XSModelGroupImpl)particle.fValue; 192 CMNode temp = null; 193 // when the model group is a choice of more than one particles, but 194 // only one of the particle is not empty, (for example 195 // <choice> 196 // <sequence/> 197 // <element name="e"/> 198 // </choice> 199 // ) we can't not return that one particle ("e"). instead, we should 200 // treat such particle as optional ("e?"). 201 // the following boolean variable is true when there are at least 202 // 2 non-empty children. 203 boolean twoChildren = false; 204 for (int i = 0; i < group.fParticleCount; i++) { 205 // first convert each child to a CM tree 206 temp = buildSyntaxTree(group.fParticles[i], 207 forUPA, 208 optimize && 209 minOccurs == 1 && maxOccurs == 1 && 210 (group.fCompositor == XSModelGroupImpl.MODELGROUP_SEQUENCE || 211 group.fParticleCount == 1)); 212 // then combine them using binary operation 213 if (temp != null) { 214 compactedForUPA |= temp.isCompactedForUPA(); 215 if (nodeRet == null) { 216 nodeRet = temp; 217 } 218 else { 219 nodeRet = fNodeFactory.getCMBinOpNode(group.fCompositor, nodeRet, temp); 220 // record the fact that there are at least 2 children 221 twoChildren = true; 222 } 223 } 224 } 225 // (task 2) expand occurrence values 226 if (nodeRet != null) { 227 // when the group is "choice", there is only one non-empty 228 // child, and the group had more than one children, we need 229 // to create a zero-or-one (optional) node for the non-empty 230 // particle. 231 if (group.fCompositor == XSModelGroupImpl.MODELGROUP_CHOICE && 232 !twoChildren && group.fParticleCount > 1) { 233 nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_ONE, nodeRet); 234 } 235 nodeRet = expandContentModel(nodeRet, minOccurs, maxOccurs, false); 236 nodeRet.setIsCompactUPAModel(compactedForUPA); 237 } 238 } 239 240 return nodeRet; 241 } 242 243 // 2. expand all occurrence values: a{n, unbounded} -> a, a, ..., a+ 244 // a{n, m} -> a, a, ..., a?, a?, ... 245 // 4. make sure each leaf node (XSCMLeaf) has a distinct position 246 private CMNode expandContentModel(CMNode node, 247 int minOccurs, int maxOccurs, boolean optimize) { 248 249 CMNode nodeRet = null; 250 251 if (minOccurs==1 && maxOccurs==1) { 252 nodeRet = node; 253 } 254 else if (minOccurs==0 && maxOccurs==1) { 255 //zero or one 256 nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_ONE, node); 257 } 258 else if (minOccurs == 0 && maxOccurs==SchemaSymbols.OCCURRENCE_UNBOUNDED) { 259 //zero or more 260 nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_MORE, node); 261 } 262 else if (minOccurs == 1 && maxOccurs==SchemaSymbols.OCCURRENCE_UNBOUNDED) { 263 //one or more 264 nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ONE_OR_MORE, node); 265 } 266 else if (optimize && node.type() == XSParticleDecl.PARTICLE_ELEMENT || 267 node.type() == XSParticleDecl.PARTICLE_WILDCARD) { 268 // Only for elements and wildcards, subsume e{n,m} and e{n,unbounded} to e* 269 // or e+ and, once the DFA reaches a final state, check if the actual number 270 // of elements is between minOccurs and maxOccurs. This new algorithm runs 271 // in constant space. 272 273 // TODO: What is the impact of this optimization on the PSVI? 274 nodeRet = fNodeFactory.getCMUniOpNode( 275 minOccurs == 0 ? XSParticleDecl.PARTICLE_ZERO_OR_MORE 276 : XSParticleDecl.PARTICLE_ONE_OR_MORE, node); 277 nodeRet.setUserData(new int[] { minOccurs, maxOccurs }); 278 } 279 else if (maxOccurs == SchemaSymbols.OCCURRENCE_UNBOUNDED) { 280 // => a,a,..,a+ 281 // create a+ node first, then put minOccurs-1 a's in front of it 282 // for the first time "node" is used, we don't need to make a copy 283 // and for other references to node, we make copies 284 nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ONE_OR_MORE, node); 285 // (task 4) we need to call copyNode here, so that we append 286 // an entire new copy of the node (a subtree). this is to ensure 287 // all leaf nodes have distinct position 288 // we know that minOccurs > 1 289 nodeRet = fNodeFactory.getCMBinOpNode(XSModelGroupImpl.MODELGROUP_SEQUENCE, 290 multiNodes(node, minOccurs-1, true), nodeRet); 291 } 292 else { 293 // {n,m} => a,a,a,...(a),(a),... 294 // first n a's, then m-n a?'s. 295 // copyNode is called, for the same reason as above 296 if (minOccurs > 0) { 297 nodeRet = multiNodes(node, minOccurs, false); 298 } 299 if (maxOccurs > minOccurs) { 300 node = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_ONE, node); 301 if (nodeRet == null) { 302 nodeRet = multiNodes(node, maxOccurs-minOccurs, false); 303 } 304 else { 305 nodeRet = fNodeFactory.getCMBinOpNode(XSModelGroupImpl.MODELGROUP_SEQUENCE, 306 nodeRet, multiNodes(node, maxOccurs-minOccurs, true)); 307 } 308 } 309 } 310 311 return nodeRet; 312 } 313 314 private CMNode multiNodes(CMNode node, int num, boolean copyFirst) { 315 if (num == 0) { 316 return null; 317 } 318 if (num == 1) { 319 return copyFirst ? copyNode(node) : node; 320 } 321 int num1 = num/2; 322 return fNodeFactory.getCMBinOpNode(XSModelGroupImpl.MODELGROUP_SEQUENCE, 323 multiNodes(node, num1, copyFirst), 324 multiNodes(node, num-num1, true)); 325 } 326 327 // 4. make sure each leaf node (XSCMLeaf) has a distinct position 328 private CMNode copyNode(CMNode node) { 329 int type = node.type(); 330 // for choice or sequence, copy the two subtrees, and combine them 331 if (type == XSModelGroupImpl.MODELGROUP_CHOICE || 332 type == XSModelGroupImpl.MODELGROUP_SEQUENCE) { 333 XSCMBinOp bin = (XSCMBinOp)node; 334 node = fNodeFactory.getCMBinOpNode(type, copyNode(bin.getLeft()), 335 copyNode(bin.getRight())); 336 } 337 // for ?+*, copy the subtree, and put it in a new ?+* node 338 else if (type == XSParticleDecl.PARTICLE_ZERO_OR_MORE || 339 type == XSParticleDecl.PARTICLE_ONE_OR_MORE || 340 type == XSParticleDecl.PARTICLE_ZERO_OR_ONE) { 341 XSCMUniOp uni = (XSCMUniOp)node; 342 node = fNodeFactory.getCMUniOpNode(type, copyNode(uni.getChild())); 343 } 344 // for element/wildcard (leaf), make a new leaf node, 345 // with a distinct position 346 else if (type == XSParticleDecl.PARTICLE_ELEMENT || 347 type == XSParticleDecl.PARTICLE_WILDCARD) { 348 XSCMLeaf leaf = (XSCMLeaf)node; 349 node = fNodeFactory.getCMLeafNode(leaf.type(), leaf.getLeaf(), leaf.getParticleId(), fLeafCount++); 350 } 351 352 return node; 353 } 354 355 // A special version of buildSyntaxTree() which builds a compact syntax tree 356 // containing compound leaf nodes which carry occurence information. This method 357 // for building the syntax tree is chosen over buildSyntaxTree() when 358 // useRepeatingLeafNodes() returns true. 359 private CMNode buildCompactSyntaxTree(XSParticleDecl particle) { 360 int maxOccurs = particle.fMaxOccurs; 361 int minOccurs = particle.fMinOccurs; 362 short type = particle.fType; 363 CMNode nodeRet = null; 364 365 if ((type == XSParticleDecl.PARTICLE_WILDCARD) || 366 (type == XSParticleDecl.PARTICLE_ELEMENT)) { 367 return buildCompactSyntaxTree2(particle, minOccurs, maxOccurs); 368 } 369 else if (type == XSParticleDecl.PARTICLE_MODELGROUP) { 370 XSModelGroupImpl group = (XSModelGroupImpl)particle.fValue; 371 if (group.fParticleCount == 1 && (minOccurs != 1 || maxOccurs != 1)) { 372 return buildCompactSyntaxTree2(group.fParticles[0], minOccurs, maxOccurs); 373 } 374 else { 375 CMNode temp = null; 376 377 // when the model group is a choice of more than one particles, but 378 // only one of the particle is not empty, (for example 379 // <choice> 380 // <sequence/> 381 // <element name="e"/> 382 // </choice> 383 // ) we can't not return that one particle ("e"). instead, we should 384 // treat such particle as optional ("e?"). 385 // the following int variable keeps track of the number of non-empty children 386 int count = 0; 387 for (int i = 0; i < group.fParticleCount; i++) { 388 // first convert each child to a CM tree 389 temp = buildCompactSyntaxTree(group.fParticles[i]); 390 // then combine them using binary operation 391 if (temp != null) { 392 ++count; 393 if (nodeRet == null) { 394 nodeRet = temp; 395 } 396 else { 397 nodeRet = fNodeFactory.getCMBinOpNode(group.fCompositor, nodeRet, temp); 398 } 399 } 400 } 401 if (nodeRet != null) { 402 // when the group is "choice" and the group has one or more empty children, 403 // we need to create a zero-or-one (optional) node for the non-empty particles. 404 if (group.fCompositor == XSModelGroupImpl.MODELGROUP_CHOICE && count < group.fParticleCount) { 405 nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_ONE, nodeRet); 406 } 407 } 408 } 409 } 410 return nodeRet; 411 } 412 413 private CMNode buildCompactSyntaxTree2(XSParticleDecl particle, int minOccurs, int maxOccurs) { 414 // Convert element and wildcard particles to leaf nodes. Wrap repeating particles in a CMUniOpNode. 415 CMNode nodeRet = null; 416 if (minOccurs == 1 && maxOccurs == 1) { 417 nodeRet = fNodeFactory.getCMLeafNode(particle.fType, particle.fValue, fParticleCount++, fLeafCount++); 418 } 419 else if (minOccurs == 0 && maxOccurs == 1) { 420 // zero or one 421 nodeRet = fNodeFactory.getCMLeafNode(particle.fType, particle.fValue, fParticleCount++, fLeafCount++); 422 nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_ONE, nodeRet); 423 } 424 else if (minOccurs == 0 && maxOccurs==SchemaSymbols.OCCURRENCE_UNBOUNDED) { 425 // zero or more 426 nodeRet = fNodeFactory.getCMLeafNode(particle.fType, particle.fValue, fParticleCount++, fLeafCount++); 427 nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_MORE, nodeRet); 428 } 429 else if (minOccurs == 1 && maxOccurs==SchemaSymbols.OCCURRENCE_UNBOUNDED) { 430 // one or more 431 nodeRet = fNodeFactory.getCMLeafNode(particle.fType, particle.fValue, fParticleCount++, fLeafCount++); 432 nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ONE_OR_MORE, nodeRet); 433 } 434 else { 435 // {n,m}: Instead of expanding this out, create a compound leaf node which carries the 436 // occurence information and wrap it in the appropriate CMUniOpNode. 437 nodeRet = fNodeFactory.getCMRepeatingLeafNode(particle.fType, particle.fValue, minOccurs, maxOccurs, fParticleCount++, fLeafCount++); 438 if (minOccurs == 0) { 439 nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_MORE, nodeRet); 440 } 441 else { 442 nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ONE_OR_MORE, nodeRet); 443 } 444 } 445 return nodeRet; 446 } 447 448 // This method checks if this particle can be transformed into a compact syntax 449 // tree containing compound leaf nodes which carry occurence information. Currently 450 // it returns true if each model group has minOccurs/maxOccurs == 1 or 451 // contains only one element/wildcard particle with minOccurs/maxOccurs == 1. 452 private boolean useRepeatingLeafNodes(XSParticleDecl particle) { 453 int maxOccurs = particle.fMaxOccurs; 454 int minOccurs = particle.fMinOccurs; 455 short type = particle.fType; 456 457 if (type == XSParticleDecl.PARTICLE_MODELGROUP) { 458 XSModelGroupImpl group = (XSModelGroupImpl) particle.fValue; 459 if (minOccurs != 1 || maxOccurs != 1) { 460 if (group.fParticleCount == 1) { 461 XSParticleDecl particle2 = group.fParticles[0]; 462 short type2 = particle2.fType; 463 return ((type2 == XSParticleDecl.PARTICLE_ELEMENT || 464 type2 == XSParticleDecl.PARTICLE_WILDCARD) && 465 particle2.fMinOccurs == 1 && 466 particle2.fMaxOccurs == 1); 467 } 468 return (group.fParticleCount == 0); 469 } 470 for (int i = 0; i < group.fParticleCount; ++i) { 471 if (!useRepeatingLeafNodes(group.fParticles[i])) { 472 return false; 473 } 474 } 475 } 476 return true; 477 } 478 }