1 /* 2 * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. 3 * @LastModified: Oct 2017 4 */ 5 /* 6 * Licensed to the Apache Software Foundation (ASF) under one or more 7 * contributor license agreements. See the NOTICE file distributed with 8 * this work for additional information regarding copyright ownership. 9 * The ASF licenses this file to You under the Apache License, Version 2.0 10 * (the "License"); you may not use this file except in compliance with 11 * the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, software 16 * distributed under the License is distributed on an "AS IS" BASIS, 17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 * See the License for the specific language governing permissions and 19 * limitations under the License. 20 */ 21 22 package com.sun.org.apache.xpath.internal.compiler; 23 24 import com.sun.org.apache.xml.internal.utils.PrefixResolver; 25 import com.sun.org.apache.xpath.internal.res.XPATHErrorResources; 26 import java.util.List; 27 28 /** 29 * This class is in charge of lexical processing of the XPath 30 * expression into tokens. 31 */ 32 class Lexer 33 { 34 35 /** 36 * The target XPath. 37 */ 38 private Compiler m_compiler; 39 40 /** 41 * The prefix resolver to map prefixes to namespaces in the XPath. 42 */ 43 PrefixResolver m_namespaceContext; 44 45 /** 46 * The XPath processor object. 47 */ 48 XPathParser m_processor; 49 50 /** 51 * This value is added to each element name in the TARGETEXTRA 52 * that is a 'target' (right-most top-level element name). 53 */ 54 static final int TARGETEXTRA = 10000; 55 56 /** 57 * Ignore this, it is going away. 58 * This holds a map to the m_tokenQueue that tells where the top-level elements are. 59 * It is used for pattern matching so the m_tokenQueue can be walked backwards. 60 * Each element that is a 'target', (right-most top level element name) has 61 * TARGETEXTRA added to it. 62 * 63 */ 64 private int m_patternMap[] = new int[100]; 65 66 /** 67 * Ignore this, it is going away. 68 * The number of elements that m_patternMap maps; 69 */ 70 private int m_patternMapSize; 71 72 /** 73 * Create a Lexer object. 74 * 75 * @param compiler The owning compiler for this lexer. 76 * @param resolver The prefix resolver for mapping qualified name prefixes 77 * to namespace URIs. 78 * @param xpathProcessor The parser that is processing strings to opcodes. 79 */ 80 Lexer(Compiler compiler, PrefixResolver resolver, 81 XPathParser xpathProcessor) 82 { 83 84 m_compiler = compiler; 85 m_namespaceContext = resolver; 86 m_processor = xpathProcessor; 87 } 88 89 /** 90 * Walk through the expression and build a token queue, and a map of the top-level 91 * elements. 92 * @param pat XSLT Expression. 93 * 94 * @throws javax.xml.transform.TransformerException 95 */ 96 void tokenize(String pat) throws javax.xml.transform.TransformerException 97 { 98 tokenize(pat, null); 99 } 100 101 /** 102 * Walk through the expression and build a token queue, and a map of the top-level 103 * elements. 104 * @param pat XSLT Expression. 105 * @param targetStrings a list to hold Strings, may be null. 106 * 107 * @throws javax.xml.transform.TransformerException 108 */ 109 @SuppressWarnings("fallthrough") // on purpose at case '-', '(' and default 110 void tokenize(String pat, List<String> targetStrings) 111 throws javax.xml.transform.TransformerException 112 { 113 114 m_compiler.m_currentPattern = pat; 115 m_patternMapSize = 0; 116 117 // This needs to grow too. 118 m_compiler.m_opMap = new OpMapVector(OpMap.MAXTOKENQUEUESIZE * 5, OpMap.BLOCKTOKENQUEUESIZE * 5, OpMap.MAPINDEX_LENGTH); 119 120 int nChars = pat.length(); 121 int startSubstring = -1; 122 int posOfNSSep = -1; 123 boolean isStartOfPat = true; 124 boolean isAttrName = false; 125 boolean isNum = false; 126 127 // Nesting of '[' so we can know if the given element should be 128 // counted inside the m_patternMap. 129 int nesting = 0; 130 131 // char[] chars = pat.toCharArray(); 132 for (int i = 0; i < nChars; i++) 133 { 134 char c = pat.charAt(i); 135 136 switch (c) 137 { 138 case '\"' : 139 { 140 if (startSubstring != -1) 141 { 142 isNum = false; 143 isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName); 144 isAttrName = false; 145 146 if (-1 != posOfNSSep) 147 { 148 posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i); 149 } 150 else 151 { 152 addToTokenQueue(pat.substring(startSubstring, i)); 153 } 154 } 155 156 startSubstring = i; 157 158 for (i++; (i < nChars) && ((c = pat.charAt(i)) != '\"'); i++); 159 160 if (c == '\"' && i < nChars) 161 { 162 addToTokenQueue(pat.substring(startSubstring, i + 1)); 163 164 startSubstring = -1; 165 } 166 else 167 { 168 m_processor.error(XPATHErrorResources.ER_EXPECTED_DOUBLE_QUOTE, 169 null); //"misquoted literal... expected double quote!"); 170 } 171 } 172 break; 173 case '\'' : 174 if (startSubstring != -1) 175 { 176 isNum = false; 177 isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName); 178 isAttrName = false; 179 180 if (-1 != posOfNSSep) 181 { 182 posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i); 183 } 184 else 185 { 186 addToTokenQueue(pat.substring(startSubstring, i)); 187 } 188 } 189 190 startSubstring = i; 191 192 for (i++; (i < nChars) && ((c = pat.charAt(i)) != '\''); i++); 193 194 if (c == '\'' && i < nChars) 195 { 196 addToTokenQueue(pat.substring(startSubstring, i + 1)); 197 198 startSubstring = -1; 199 } 200 else 201 { 202 m_processor.error(XPATHErrorResources.ER_EXPECTED_SINGLE_QUOTE, 203 null); //"misquoted literal... expected single quote!"); 204 } 205 break; 206 case 0x0A : 207 case 0x0D : 208 case ' ' : 209 case '\t' : 210 if (startSubstring != -1) 211 { 212 isNum = false; 213 isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName); 214 isAttrName = false; 215 216 if (-1 != posOfNSSep) 217 { 218 posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i); 219 } 220 else 221 { 222 addToTokenQueue(pat.substring(startSubstring, i)); 223 } 224 225 startSubstring = -1; 226 } 227 break; 228 case '@' : 229 isAttrName = true; 230 231 // fall-through on purpose 232 case '-' : 233 if ('-' == c) 234 { 235 if (!(isNum || (startSubstring == -1))) 236 { 237 break; 238 } 239 240 isNum = false; 241 } 242 243 // fall-through on purpose 244 case '(' : 245 case '[' : 246 case ')' : 247 case ']' : 248 case '|' : 249 case '/' : 250 case '*' : 251 case '+' : 252 case '=' : 253 case ',' : 254 case '\\' : // Unused at the moment 255 case '^' : // Unused at the moment 256 case '!' : // Unused at the moment 257 case '$' : 258 case '<' : 259 case '>' : 260 if (startSubstring != -1) 261 { 262 isNum = false; 263 isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName); 264 isAttrName = false; 265 266 if (-1 != posOfNSSep) 267 { 268 posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i); 269 } 270 else 271 { 272 addToTokenQueue(pat.substring(startSubstring, i)); 273 } 274 275 startSubstring = -1; 276 } 277 else if (('/' == c) && isStartOfPat) 278 { 279 isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName); 280 } 281 else if ('*' == c) 282 { 283 isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName); 284 isAttrName = false; 285 } 286 287 if (0 == nesting) 288 { 289 if ('|' == c) 290 { 291 if (null != targetStrings) 292 { 293 recordTokenString(targetStrings); 294 } 295 296 isStartOfPat = true; 297 } 298 } 299 300 if ((')' == c) || (']' == c)) 301 { 302 nesting--; 303 } 304 else if (('(' == c) || ('[' == c)) 305 { 306 nesting++; 307 } 308 309 addToTokenQueue(pat.substring(i, i + 1)); 310 break; 311 case ':' : 312 if (i>0) 313 { 314 if (posOfNSSep == (i - 1)) 315 { 316 if (startSubstring != -1) 317 { 318 if (startSubstring < (i - 1)) 319 addToTokenQueue(pat.substring(startSubstring, i - 1)); 320 } 321 322 isNum = false; 323 isAttrName = false; 324 startSubstring = -1; 325 posOfNSSep = -1; 326 327 addToTokenQueue(pat.substring(i - 1, i + 1)); 328 329 break; 330 } 331 else 332 { 333 posOfNSSep = i; 334 } 335 } 336 337 // fall through on purpose 338 default : 339 if (-1 == startSubstring) 340 { 341 startSubstring = i; 342 isNum = Character.isDigit(c); 343 } 344 else if (isNum) 345 { 346 isNum = Character.isDigit(c); 347 } 348 } 349 } 350 351 if (startSubstring != -1) 352 { 353 isNum = false; 354 isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName); 355 356 if ((-1 != posOfNSSep) || 357 ((m_namespaceContext != null) && (m_namespaceContext.handlesNullPrefixes()))) 358 { 359 posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, nChars); 360 } 361 else 362 { 363 addToTokenQueue(pat.substring(startSubstring, nChars)); 364 } 365 } 366 367 if (0 == m_compiler.getTokenQueueSize()) 368 { 369 m_processor.error(XPATHErrorResources.ER_EMPTY_EXPRESSION, null); //"Empty expression!"); 370 } 371 else if (null != targetStrings) 372 { 373 recordTokenString(targetStrings); 374 } 375 376 m_processor.m_queueMark = 0; 377 } 378 379 /** 380 * Record the current position on the token queue as long as 381 * this is a top-level element. Must be called before the 382 * next token is added to the m_tokenQueue. 383 * 384 * @param nesting The nesting count for the pattern element. 385 * @param isStart true if this is the start of a pattern. 386 * @param isAttrName true if we have determined that this is an attribute name. 387 * 388 * @return true if this is the start of a pattern. 389 */ 390 private boolean mapPatternElemPos(int nesting, boolean isStart, 391 boolean isAttrName) 392 { 393 394 if (0 == nesting) 395 { 396 if(m_patternMapSize >= m_patternMap.length) 397 { 398 int patternMap[] = m_patternMap; 399 int len = m_patternMap.length; 400 m_patternMap = new int[m_patternMapSize + 100]; 401 System.arraycopy(patternMap, 0, m_patternMap, 0, len); 402 } 403 if (!isStart) 404 { 405 m_patternMap[m_patternMapSize - 1] -= TARGETEXTRA; 406 } 407 m_patternMap[m_patternMapSize] = 408 (m_compiler.getTokenQueueSize() - (isAttrName ? 1 : 0)) + TARGETEXTRA; 409 410 m_patternMapSize++; 411 412 isStart = false; 413 } 414 415 return isStart; 416 } 417 418 /** 419 * Given a map pos, return the corresponding token queue pos. 420 * 421 * @param i The index in the m_patternMap. 422 * 423 * @return the token queue position. 424 */ 425 private int getTokenQueuePosFromMap(int i) 426 { 427 428 int pos = m_patternMap[i]; 429 430 return (pos >= TARGETEXTRA) ? (pos - TARGETEXTRA) : pos; 431 } 432 433 /** 434 * Reset token queue mark and m_token to a 435 * given position. 436 * @param mark The new position. 437 */ 438 private final void resetTokenMark(int mark) 439 { 440 441 int qsz = m_compiler.getTokenQueueSize(); 442 443 m_processor.m_queueMark = (mark > 0) 444 ? ((mark <= qsz) ? mark - 1 : mark) : 0; 445 446 if (m_processor.m_queueMark < qsz) 447 { 448 m_processor.m_token = 449 (String) m_compiler.getTokenQueue().elementAt(m_processor.m_queueMark++); 450 m_processor.m_tokenChar = m_processor.m_token.charAt(0); 451 } 452 else 453 { 454 m_processor.m_token = null; 455 m_processor.m_tokenChar = 0; 456 } 457 } 458 459 /** 460 * Given a string, return the corresponding keyword token. 461 * 462 * @param key The keyword. 463 * 464 * @return An opcode value. 465 */ 466 final int getKeywordToken(String key) 467 { 468 469 int tok; 470 471 try 472 { 473 Integer itok = (Integer) Keywords.getKeyWord(key); 474 475 tok = (null != itok) ? itok.intValue() : 0; 476 } 477 catch (NullPointerException npe) 478 { 479 tok = 0; 480 } 481 catch (ClassCastException cce) 482 { 483 tok = 0; 484 } 485 486 return tok; 487 } 488 489 /** 490 * Record the current token in the passed vector. 491 * 492 * @param targetStrings a list of strings. 493 */ 494 private void recordTokenString(List<String> targetStrings) 495 { 496 497 int tokPos = getTokenQueuePosFromMap(m_patternMapSize - 1); 498 499 resetTokenMark(tokPos + 1); 500 501 if (m_processor.lookahead('(', 1)) 502 { 503 int tok = getKeywordToken(m_processor.m_token); 504 505 switch (tok) 506 { 507 case OpCodes.NODETYPE_COMMENT : 508 targetStrings.add(PsuedoNames.PSEUDONAME_COMMENT); 509 break; 510 case OpCodes.NODETYPE_TEXT : 511 targetStrings.add(PsuedoNames.PSEUDONAME_TEXT); 512 break; 513 case OpCodes.NODETYPE_NODE : 514 targetStrings.add(PsuedoNames.PSEUDONAME_ANY); 515 break; 516 case OpCodes.NODETYPE_ROOT : 517 targetStrings.add(PsuedoNames.PSEUDONAME_ROOT); 518 break; 519 case OpCodes.NODETYPE_ANYELEMENT : 520 targetStrings.add(PsuedoNames.PSEUDONAME_ANY); 521 break; 522 case OpCodes.NODETYPE_PI : 523 targetStrings.add(PsuedoNames.PSEUDONAME_ANY); 524 break; 525 default : 526 targetStrings.add(PsuedoNames.PSEUDONAME_ANY); 527 } 528 } 529 else 530 { 531 if (m_processor.tokenIs('@')) 532 { 533 tokPos++; 534 535 resetTokenMark(tokPos + 1); 536 } 537 538 if (m_processor.lookahead(':', 1)) 539 { 540 tokPos += 2; 541 } 542 543 targetStrings.add((String)m_compiler.getTokenQueue().elementAt(tokPos)); 544 } 545 } 546 547 /** 548 * Add a token to the token queue. 549 * 550 * 551 * @param s The token. 552 */ 553 private final void addToTokenQueue(String s) 554 { 555 m_compiler.getTokenQueue().addElement(s); 556 } 557 558 /** 559 * When a seperator token is found, see if there's a element name or 560 * the like to map. 561 * 562 * @param pat The XPath name string. 563 * @param startSubstring The start of the name string. 564 * @param posOfNSSep The position of the namespace seperator (':'). 565 * @param posOfScan The end of the name index. 566 * 567 * @throws javax.xml.transform.TransformerException 568 * 569 * @return -1 always. 570 */ 571 private int mapNSTokens(String pat, int startSubstring, int posOfNSSep, 572 int posOfScan) 573 throws javax.xml.transform.TransformerException 574 { 575 576 String prefix = ""; 577 578 if ((startSubstring >= 0) && (posOfNSSep >= 0)) 579 { 580 prefix = pat.substring(startSubstring, posOfNSSep); 581 } 582 String uName; 583 584 if ((null != m_namespaceContext) &&!prefix.equals("*") 585 &&!prefix.equals("xmlns")) 586 { 587 try 588 { 589 if (prefix.length() > 0) 590 uName = ((PrefixResolver) m_namespaceContext).getNamespaceForPrefix( 591 prefix); 592 else 593 { 594 595 // Assume last was wildcard. This is not legal according 596 // to the draft. Set the below to true to make namespace 597 // wildcards work. 598 if (false) 599 { 600 addToTokenQueue(":"); 601 602 String s = pat.substring(posOfNSSep + 1, posOfScan); 603 604 if (s.length() > 0) 605 addToTokenQueue(s); 606 607 return -1; 608 } 609 else 610 { 611 uName = 612 ((PrefixResolver) m_namespaceContext).getNamespaceForPrefix( 613 prefix); 614 } 615 } 616 } 617 catch (ClassCastException cce) 618 { 619 uName = m_namespaceContext.getNamespaceForPrefix(prefix); 620 } 621 } 622 else 623 { 624 uName = prefix; 625 } 626 627 if ((null != uName) && (uName.length() > 0)) 628 { 629 addToTokenQueue(uName); 630 addToTokenQueue(":"); 631 632 String s = pat.substring(posOfNSSep + 1, posOfScan); 633 634 if (s.length() > 0) 635 addToTokenQueue(s); 636 } 637 else 638 { 639 m_processor.error(XPATHErrorResources.ER_PREFIX_MUST_RESOLVE, 640 new String[] {prefix}); //"Prefix must resolve to a namespace: {0}"; 641 } 642 643 return -1; 644 } 645 }