1 /* 2 * Copyright (c) 2006, 2009, Oracle and/or its affiliates. All rights reserved. 3 */ 4 /* 5 * Licensed to the Apache Software Foundation (ASF) under one or more 6 * contributor license agreements. See the NOTICE file distributed with 7 * this work for additional information regarding copyright ownership. 8 * The ASF licenses this file to You under the Apache License, Version 2.0 9 * (the "License"); you may not use this file except in compliance with 10 * the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 package com.sun.org.apache.xerces.internal.impl.xs.models; 22 23 import com.sun.org.apache.xerces.internal.impl.dtd.models.CMNode; 24 import com.sun.org.apache.xerces.internal.impl.xs.SchemaSymbols; 25 import com.sun.org.apache.xerces.internal.impl.xs.XSComplexTypeDecl; 26 import com.sun.org.apache.xerces.internal.impl.xs.XSDeclarationPool; 27 import com.sun.org.apache.xerces.internal.impl.xs.XSElementDecl; 28 import com.sun.org.apache.xerces.internal.impl.xs.XSModelGroupImpl; 29 import com.sun.org.apache.xerces.internal.impl.xs.XSParticleDecl; 30 31 /** 32 * This class constructs content models for a given grammar. 33 * 34 * @xerces.internal 35 * 36 * @author Elena Litani, IBM 37 * @author Sandy Gao, IBM 38 * 39 */ 40 public class CMBuilder { 41 42 // REVISIT: should update the decl pool to cache XSCM objects too 43 private XSDeclarationPool fDeclPool = null; 44 45 // It never changes, so a static member is good enough 46 private static XSEmptyCM fEmptyCM = new XSEmptyCM(); 47 48 // needed for DFA construction 49 private int fLeafCount; 50 // needed for UPA 51 private int fParticleCount; 52 //Factory to create Bin, Uni, Leaf nodes 53 private CMNodeFactory fNodeFactory ; 54 55 public CMBuilder(CMNodeFactory nodeFactory) { 56 fDeclPool = null; 57 fNodeFactory = nodeFactory ; 58 } 59 60 public void setDeclPool(XSDeclarationPool declPool) { 61 fDeclPool = declPool; 62 } 63 64 /** 65 * Get content model for the a given type 66 * 67 * @param typeDecl get content model for which complex type 68 * @param forUPA a flag indicating whether it is for UPA 69 * @return a content model validator 70 */ 71 public XSCMValidator getContentModel(XSComplexTypeDecl typeDecl, boolean forUPA) { 72 73 // for complex type with empty or simple content, 74 // there is no content model validator 75 short contentType = typeDecl.getContentType(); 76 if (contentType == XSComplexTypeDecl.CONTENTTYPE_SIMPLE || 77 contentType == XSComplexTypeDecl.CONTENTTYPE_EMPTY) { 78 return null; 79 } 80 81 XSParticleDecl particle = (XSParticleDecl)typeDecl.getParticle(); 82 83 // if the content is element only or mixed, but no particle 84 // is defined, return the empty content model 85 if (particle == null) 86 return fEmptyCM; 87 88 // if the content model contains "all" model group, 89 // we create an "all" content model, otherwise a DFA content model 90 XSCMValidator cmValidator = null; 91 if (particle.fType == XSParticleDecl.PARTICLE_MODELGROUP && 92 ((XSModelGroupImpl)particle.fValue).fCompositor == XSModelGroupImpl.MODELGROUP_ALL) { 93 cmValidator = createAllCM(particle); 94 } 95 else { 96 cmValidator = createDFACM(particle, forUPA); 97 } 98 99 //now we are throught building content model and have passed sucessfully of the nodecount check 100 //if set by the application 101 fNodeFactory.resetNodeCount() ; 102 103 // if the validator returned is null, it means there is nothing in 104 // the content model, so we return the empty content model. 105 if (cmValidator == null) 106 cmValidator = fEmptyCM; 107 108 return cmValidator; 109 } 110 111 XSCMValidator createAllCM(XSParticleDecl particle) { 112 if (particle.fMaxOccurs == 0) 113 return null; 114 115 // get the model group, and add all children of it to the content model 116 XSModelGroupImpl group = (XSModelGroupImpl)particle.fValue; 117 // create an all content model. the parameter indicates whether 118 // the <all> itself is optional 119 XSAllCM allContent = new XSAllCM(particle.fMinOccurs == 0, group.fParticleCount); 120 for (int i = 0; i < group.fParticleCount; i++) { 121 // add the element decl to the all content model 122 allContent.addElement((XSElementDecl)group.fParticles[i].fValue, 123 group.fParticles[i].fMinOccurs == 0); 124 } 125 return allContent; 126 } 127 128 XSCMValidator createDFACM(XSParticleDecl particle, boolean forUPA) { 129 fLeafCount = 0; 130 fParticleCount = 0; 131 // convert particle tree to CM tree 132 CMNode node = useRepeatingLeafNodes(particle) ? buildCompactSyntaxTree(particle) : buildSyntaxTree(particle, forUPA, true); 133 if (node == null) 134 return null; 135 // build DFA content model from the CM tree 136 return new XSDFACM(node, fLeafCount); 137 } 138 139 // 1. convert particle tree to CM tree: 140 // 2. expand all occurrence values: a{n, unbounded} -> a, a, ..., a+ 141 // a{n, m} -> a, a, ..., a?, a?, ... 142 // 3. convert model groups (a, b, c, ...) or (a | b | c | ...) to 143 // binary tree: (((a,b),c),...) or (((a|b)|c)|...) 144 // 4. make sure each leaf node (XSCMLeaf) has a distinct position 145 private CMNode buildSyntaxTree(XSParticleDecl particle, boolean forUPA, boolean optimize) { 146 147 int maxOccurs = particle.fMaxOccurs; 148 int minOccurs = particle.fMinOccurs; 149 150 boolean compactedForUPA = false; 151 if (forUPA) { 152 // When doing UPA, we reduce the size of the minOccurs/maxOccurs values to make 153 // processing the DFA faster. For UPA the exact values don't matter. 154 if (minOccurs > 1) { 155 if (maxOccurs > minOccurs || particle.getMaxOccursUnbounded()) { 156 minOccurs = 1; 157 compactedForUPA = true; 158 } 159 else { // maxOccurs == minOccurs 160 minOccurs = 2; 161 compactedForUPA = true; 162 } 163 } 164 if (maxOccurs > 1) { 165 maxOccurs = 2; 166 compactedForUPA = true; 167 } 168 } 169 170 short type = particle.fType; 171 CMNode nodeRet = null; 172 173 if ((type == XSParticleDecl.PARTICLE_WILDCARD) || 174 (type == XSParticleDecl.PARTICLE_ELEMENT)) { 175 // (task 1) element and wildcard particles should be converted to 176 // leaf nodes 177 // REVISIT: Make a clone of the leaf particle, so that if there 178 // are two references to the same group, we have two different 179 // leaf particles for the same element or wildcard decl. 180 // This is useful for checking UPA. 181 nodeRet = fNodeFactory.getCMLeafNode(particle.fType, particle.fValue, fParticleCount++, fLeafCount++); 182 // (task 2) expand occurrence values 183 nodeRet = expandContentModel(nodeRet, minOccurs, maxOccurs, optimize); 184 if (nodeRet != null) { 185 nodeRet.setIsCompactUPAModel(compactedForUPA); 186 } 187 } 188 else if (type == XSParticleDecl.PARTICLE_MODELGROUP) { 189 // (task 1,3) convert model groups to binary trees 190 XSModelGroupImpl group = (XSModelGroupImpl)particle.fValue; 191 CMNode temp = null; 192 // when the model group is a choice of more than one particles, but 193 // only one of the particle is not empty, (for example 194 // <choice> 195 // <sequence/> 196 // <element name="e"/> 197 // </choice> 198 // ) we can't not return that one particle ("e"). instead, we should 199 // treat such particle as optional ("e?"). 200 // the following boolean variable is true when there are at least 201 // 2 non-empty children. 202 boolean twoChildren = false; 203 for (int i = 0; i < group.fParticleCount; i++) { 204 // first convert each child to a CM tree 205 temp = buildSyntaxTree(group.fParticles[i], 206 forUPA, 207 optimize && 208 minOccurs == 1 && maxOccurs == 1 && 209 (group.fCompositor == XSModelGroupImpl.MODELGROUP_SEQUENCE || 210 group.fParticleCount == 1)); 211 // then combine them using binary operation 212 if (temp != null) { 213 compactedForUPA |= temp.isCompactedForUPA(); 214 if (nodeRet == null) { 215 nodeRet = temp; 216 } 217 else { 218 nodeRet = fNodeFactory.getCMBinOpNode(group.fCompositor, nodeRet, temp); 219 // record the fact that there are at least 2 children 220 twoChildren = true; 221 } 222 } 223 } 224 // (task 2) expand occurrence values 225 if (nodeRet != null) { 226 // when the group is "choice", there is only one non-empty 227 // child, and the group had more than one children, we need 228 // to create a zero-or-one (optional) node for the non-empty 229 // particle. 230 if (group.fCompositor == XSModelGroupImpl.MODELGROUP_CHOICE && 231 !twoChildren && group.fParticleCount > 1) { 232 nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_ONE, nodeRet); 233 } 234 nodeRet = expandContentModel(nodeRet, minOccurs, maxOccurs, false); 235 nodeRet.setIsCompactUPAModel(compactedForUPA); 236 } 237 } 238 239 return nodeRet; 240 } 241 242 // 2. expand all occurrence values: a{n, unbounded} -> a, a, ..., a+ 243 // a{n, m} -> a, a, ..., a?, a?, ... 244 // 4. make sure each leaf node (XSCMLeaf) has a distinct position 245 private CMNode expandContentModel(CMNode node, 246 int minOccurs, int maxOccurs, boolean optimize) { 247 248 CMNode nodeRet = null; 249 250 if (minOccurs==1 && maxOccurs==1) { 251 nodeRet = node; 252 } 253 else if (minOccurs==0 && maxOccurs==1) { 254 //zero or one 255 nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_ONE, node); 256 } 257 else if (minOccurs == 0 && maxOccurs==SchemaSymbols.OCCURRENCE_UNBOUNDED) { 258 //zero or more 259 nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_MORE, node); 260 } 261 else if (minOccurs == 1 && maxOccurs==SchemaSymbols.OCCURRENCE_UNBOUNDED) { 262 //one or more 263 nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ONE_OR_MORE, node); 264 } 265 else if (optimize && node.type() == XSParticleDecl.PARTICLE_ELEMENT || 266 node.type() == XSParticleDecl.PARTICLE_WILDCARD) { 267 // Only for elements and wildcards, subsume e{n,m} and e{n,unbounded} to e* 268 // or e+ and, once the DFA reaches a final state, check if the actual number 269 // of elements is between minOccurs and maxOccurs. This new algorithm runs 270 // in constant space. 271 272 // TODO: What is the impact of this optimization on the PSVI? 273 nodeRet = fNodeFactory.getCMUniOpNode( 274 minOccurs == 0 ? XSParticleDecl.PARTICLE_ZERO_OR_MORE 275 : XSParticleDecl.PARTICLE_ONE_OR_MORE, node); 276 nodeRet.setUserData(new int[] { minOccurs, maxOccurs }); 277 } 278 else if (maxOccurs == SchemaSymbols.OCCURRENCE_UNBOUNDED) { 279 // => a,a,..,a+ 280 // create a+ node first, then put minOccurs-1 a's in front of it 281 // for the first time "node" is used, we don't need to make a copy 282 // and for other references to node, we make copies 283 nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ONE_OR_MORE, node); 284 // (task 4) we need to call copyNode here, so that we append 285 // an entire new copy of the node (a subtree). this is to ensure 286 // all leaf nodes have distinct position 287 // we know that minOccurs > 1 288 nodeRet = fNodeFactory.getCMBinOpNode(XSModelGroupImpl.MODELGROUP_SEQUENCE, 289 multiNodes(node, minOccurs-1, true), nodeRet); 290 } 291 else { 292 // {n,m} => a,a,a,...(a),(a),... 293 // first n a's, then m-n a?'s. 294 // copyNode is called, for the same reason as above 295 if (minOccurs > 0) { 296 nodeRet = multiNodes(node, minOccurs, false); 297 } 298 if (maxOccurs > minOccurs) { 299 node = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_ONE, node); 300 if (nodeRet == null) { 301 nodeRet = multiNodes(node, maxOccurs-minOccurs, false); 302 } 303 else { 304 nodeRet = fNodeFactory.getCMBinOpNode(XSModelGroupImpl.MODELGROUP_SEQUENCE, 305 nodeRet, multiNodes(node, maxOccurs-minOccurs, true)); 306 } 307 } 308 } 309 310 return nodeRet; 311 } 312 313 private CMNode multiNodes(CMNode node, int num, boolean copyFirst) { 314 if (num == 0) { 315 return null; 316 } 317 if (num == 1) { 318 return copyFirst ? copyNode(node) : node; 319 } 320 int num1 = num/2; 321 return fNodeFactory.getCMBinOpNode(XSModelGroupImpl.MODELGROUP_SEQUENCE, 322 multiNodes(node, num1, copyFirst), 323 multiNodes(node, num-num1, true)); 324 } 325 326 // 4. make sure each leaf node (XSCMLeaf) has a distinct position 327 private CMNode copyNode(CMNode node) { 328 int type = node.type(); 329 // for choice or sequence, copy the two subtrees, and combine them 330 if (type == XSModelGroupImpl.MODELGROUP_CHOICE || 331 type == XSModelGroupImpl.MODELGROUP_SEQUENCE) { 332 XSCMBinOp bin = (XSCMBinOp)node; 333 node = fNodeFactory.getCMBinOpNode(type, copyNode(bin.getLeft()), 334 copyNode(bin.getRight())); 335 } 336 // for ?+*, copy the subtree, and put it in a new ?+* node 337 else if (type == XSParticleDecl.PARTICLE_ZERO_OR_MORE || 338 type == XSParticleDecl.PARTICLE_ONE_OR_MORE || 339 type == XSParticleDecl.PARTICLE_ZERO_OR_ONE) { 340 XSCMUniOp uni = (XSCMUniOp)node; 341 node = fNodeFactory.getCMUniOpNode(type, copyNode(uni.getChild())); 342 } 343 // for element/wildcard (leaf), make a new leaf node, 344 // with a distinct position 345 else if (type == XSParticleDecl.PARTICLE_ELEMENT || 346 type == XSParticleDecl.PARTICLE_WILDCARD) { 347 XSCMLeaf leaf = (XSCMLeaf)node; 348 node = fNodeFactory.getCMLeafNode(leaf.type(), leaf.getLeaf(), leaf.getParticleId(), fLeafCount++); 349 } 350 351 return node; 352 } 353 354 // A special version of buildSyntaxTree() which builds a compact syntax tree 355 // containing compound leaf nodes which carry occurence information. This method 356 // for building the syntax tree is chosen over buildSyntaxTree() when 357 // useRepeatingLeafNodes() returns true. 358 private CMNode buildCompactSyntaxTree(XSParticleDecl particle) { 359 int maxOccurs = particle.fMaxOccurs; 360 int minOccurs = particle.fMinOccurs; 361 short type = particle.fType; 362 CMNode nodeRet = null; 363 364 if ((type == XSParticleDecl.PARTICLE_WILDCARD) || 365 (type == XSParticleDecl.PARTICLE_ELEMENT)) { 366 return buildCompactSyntaxTree2(particle, minOccurs, maxOccurs); 367 } 368 else if (type == XSParticleDecl.PARTICLE_MODELGROUP) { 369 XSModelGroupImpl group = (XSModelGroupImpl)particle.fValue; 370 if (group.fParticleCount == 1 && (minOccurs != 1 || maxOccurs != 1)) { 371 return buildCompactSyntaxTree2(group.fParticles[0], minOccurs, maxOccurs); 372 } 373 else { 374 CMNode temp = null; 375 376 // when the model group is a choice of more than one particles, but 377 // only one of the particle is not empty, (for example 378 // <choice> 379 // <sequence/> 380 // <element name="e"/> 381 // </choice> 382 // ) we can't not return that one particle ("e"). instead, we should 383 // treat such particle as optional ("e?"). 384 // the following int variable keeps track of the number of non-empty children 385 int count = 0; 386 for (int i = 0; i < group.fParticleCount; i++) { 387 // first convert each child to a CM tree 388 temp = buildCompactSyntaxTree(group.fParticles[i]); 389 // then combine them using binary operation 390 if (temp != null) { 391 ++count; 392 if (nodeRet == null) { 393 nodeRet = temp; 394 } 395 else { 396 nodeRet = fNodeFactory.getCMBinOpNode(group.fCompositor, nodeRet, temp); 397 } 398 } 399 } 400 if (nodeRet != null) { 401 // when the group is "choice" and the group has one or more empty children, 402 // we need to create a zero-or-one (optional) node for the non-empty particles. 403 if (group.fCompositor == XSModelGroupImpl.MODELGROUP_CHOICE && count < group.fParticleCount) { 404 nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_ONE, nodeRet); 405 } 406 } 407 } 408 } 409 return nodeRet; 410 } 411 412 private CMNode buildCompactSyntaxTree2(XSParticleDecl particle, int minOccurs, int maxOccurs) { 413 // Convert element and wildcard particles to leaf nodes. Wrap repeating particles in a CMUniOpNode. 414 CMNode nodeRet = null; 415 if (minOccurs == 1 && maxOccurs == 1) { 416 nodeRet = fNodeFactory.getCMLeafNode(particle.fType, particle.fValue, fParticleCount++, fLeafCount++); 417 } 418 else if (minOccurs == 0 && maxOccurs == 1) { 419 // zero or one 420 nodeRet = fNodeFactory.getCMLeafNode(particle.fType, particle.fValue, fParticleCount++, fLeafCount++); 421 nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_ONE, nodeRet); 422 } 423 else if (minOccurs == 0 && maxOccurs==SchemaSymbols.OCCURRENCE_UNBOUNDED) { 424 // zero or more 425 nodeRet = fNodeFactory.getCMLeafNode(particle.fType, particle.fValue, fParticleCount++, fLeafCount++); 426 nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_MORE, nodeRet); 427 } 428 else if (minOccurs == 1 && maxOccurs==SchemaSymbols.OCCURRENCE_UNBOUNDED) { 429 // one or more 430 nodeRet = fNodeFactory.getCMLeafNode(particle.fType, particle.fValue, fParticleCount++, fLeafCount++); 431 nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ONE_OR_MORE, nodeRet); 432 } 433 else { 434 // {n,m}: Instead of expanding this out, create a compound leaf node which carries the 435 // occurence information and wrap it in the appropriate CMUniOpNode. 436 nodeRet = fNodeFactory.getCMRepeatingLeafNode(particle.fType, particle.fValue, minOccurs, maxOccurs, fParticleCount++, fLeafCount++); 437 if (minOccurs == 0) { 438 nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_MORE, nodeRet); 439 } 440 else { 441 nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ONE_OR_MORE, nodeRet); 442 } 443 } 444 return nodeRet; 445 } 446 447 // This method checks if this particle can be transformed into a compact syntax 448 // tree containing compound leaf nodes which carry occurence information. Currently 449 // it returns true if each model group has minOccurs/maxOccurs == 1 or 450 // contains only one element/wildcard particle with minOccurs/maxOccurs == 1. 451 private boolean useRepeatingLeafNodes(XSParticleDecl particle) { 452 int maxOccurs = particle.fMaxOccurs; 453 int minOccurs = particle.fMinOccurs; 454 short type = particle.fType; 455 456 if (type == XSParticleDecl.PARTICLE_MODELGROUP) { 457 XSModelGroupImpl group = (XSModelGroupImpl) particle.fValue; 458 if (minOccurs != 1 || maxOccurs != 1) { 459 if (group.fParticleCount == 1) { 460 XSParticleDecl particle2 = (XSParticleDecl) group.fParticles[0]; 461 short type2 = particle2.fType; 462 return ((type2 == XSParticleDecl.PARTICLE_ELEMENT || 463 type2 == XSParticleDecl.PARTICLE_WILDCARD) && 464 particle2.fMinOccurs == 1 && 465 particle2.fMaxOccurs == 1); 466 } 467 return (group.fParticleCount == 0); 468 } 469 for (int i = 0; i < group.fParticleCount; ++i) { 470 if (!useRepeatingLeafNodes(group.fParticles[i])) { 471 return false; 472 } 473 } 474 } 475 return true; 476 } 477 }