1 /*
   2  * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
   3  * @LastModified: Oct 2017
   4  */
   5 /*
   6  * Licensed to the Apache Software Foundation (ASF) under one or more
   7  * contributor license agreements.  See the NOTICE file distributed with
   8  * this work for additional information regarding copyright ownership.
   9  * The ASF licenses this file to You under the Apache License, Version 2.0
  10  * (the "License"); you may not use this file except in compliance with
  11  * the License.  You may obtain a copy of the License at
  12  *
  13  *      http://www.apache.org/licenses/LICENSE-2.0
  14  *
  15  * Unless required by applicable law or agreed to in writing, software
  16  * distributed under the License is distributed on an "AS IS" BASIS,
  17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  18  * See the License for the specific language governing permissions and
  19  * limitations under the License.
  20  */
  21 
  22 package com.sun.org.apache.xpath.internal.compiler;
  23 
  24 import com.sun.org.apache.xml.internal.utils.PrefixResolver;
  25 import com.sun.org.apache.xpath.internal.res.XPATHErrorResources;
  26 import java.util.List;
  27 
  28 /**
  29  * This class is in charge of lexical processing of the XPath
  30  * expression into tokens.
  31  */
  32 class Lexer
  33 {
  34 
  35   /**
  36    * The target XPath.
  37    */
  38   private Compiler m_compiler;
  39 
  40   /**
  41    * The prefix resolver to map prefixes to namespaces in the XPath.
  42    */
  43   PrefixResolver m_namespaceContext;
  44 
  45   /**
  46    * The XPath processor object.
  47    */
  48   XPathParser m_processor;
  49 
  50   /**
  51    * This value is added to each element name in the TARGETEXTRA
  52    * that is a 'target' (right-most top-level element name).
  53    */
  54   static final int TARGETEXTRA = 10000;
  55 
  56   /**
  57    * Ignore this, it is going away.
  58    * This holds a map to the m_tokenQueue that tells where the top-level elements are.
  59    * It is used for pattern matching so the m_tokenQueue can be walked backwards.
  60    * Each element that is a 'target', (right-most top level element name) has
  61    * TARGETEXTRA added to it.
  62    *
  63    */
  64   private int m_patternMap[] = new int[100];
  65 
  66   /**
  67    * Ignore this, it is going away.
  68    * The number of elements that m_patternMap maps;
  69    */
  70   private int m_patternMapSize;
  71 
  72   /**
  73    * Create a Lexer object.
  74    *
  75    * @param compiler The owning compiler for this lexer.
  76    * @param resolver The prefix resolver for mapping qualified name prefixes
  77    *                 to namespace URIs.
  78    * @param xpathProcessor The parser that is processing strings to opcodes.
  79    */
  80   Lexer(Compiler compiler, PrefixResolver resolver,
  81         XPathParser xpathProcessor)
  82   {
  83 
  84     m_compiler = compiler;
  85     m_namespaceContext = resolver;
  86     m_processor = xpathProcessor;
  87   }
  88 
  89   /**
  90    * Walk through the expression and build a token queue, and a map of the top-level
  91    * elements.
  92    * @param pat XSLT Expression.
  93    *
  94    * @throws javax.xml.transform.TransformerException
  95    */
  96   void tokenize(String pat) throws javax.xml.transform.TransformerException
  97   {
  98     tokenize(pat, null);
  99   }
 100 
 101   /**
 102    * Walk through the expression and build a token queue, and a map of the top-level
 103    * elements.
 104    * @param pat XSLT Expression.
 105    * @param targetStrings a list to hold Strings, may be null.
 106    *
 107    * @throws javax.xml.transform.TransformerException
 108    */
 109   @SuppressWarnings("fallthrough") // on purpose at case '-', '(' and default
 110   void tokenize(String pat, List<String> targetStrings)
 111           throws javax.xml.transform.TransformerException
 112   {
 113 
 114     m_compiler.m_currentPattern = pat;
 115     m_patternMapSize = 0;
 116 
 117     // This needs to grow too.
 118     m_compiler.m_opMap = new OpMapVector(OpMap.MAXTOKENQUEUESIZE * 5, OpMap.BLOCKTOKENQUEUESIZE * 5, OpMap.MAPINDEX_LENGTH);
 119 
 120     int nChars = pat.length();
 121     int startSubstring = -1;
 122     int posOfNSSep = -1;
 123     boolean isStartOfPat = true;
 124     boolean isAttrName = false;
 125     boolean isNum = false;
 126 
 127     // Nesting of '[' so we can know if the given element should be
 128     // counted inside the m_patternMap.
 129     int nesting = 0;
 130 
 131     // char[] chars = pat.toCharArray();
 132     for (int i = 0; i < nChars; i++)
 133     {
 134       char c = pat.charAt(i);
 135 
 136       switch (c)
 137       {
 138       case '\"' :
 139       {
 140         if (startSubstring != -1)
 141         {
 142           isNum = false;
 143           isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
 144           isAttrName = false;
 145 
 146           if (-1 != posOfNSSep)
 147           {
 148             posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
 149           }
 150           else
 151           {
 152             addToTokenQueue(pat.substring(startSubstring, i));
 153           }
 154         }
 155 
 156         startSubstring = i;
 157 
 158         for (i++; (i < nChars) && ((c = pat.charAt(i)) != '\"'); i++);
 159 
 160         if (c == '\"' && i < nChars)
 161         {
 162           addToTokenQueue(pat.substring(startSubstring, i + 1));
 163 
 164           startSubstring = -1;
 165         }
 166         else
 167         {
 168           m_processor.error(XPATHErrorResources.ER_EXPECTED_DOUBLE_QUOTE,
 169                             null);  //"misquoted literal... expected double quote!");
 170         }
 171       }
 172       break;
 173       case '\'' :
 174         if (startSubstring != -1)
 175         {
 176           isNum = false;
 177           isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
 178           isAttrName = false;
 179 
 180           if (-1 != posOfNSSep)
 181           {
 182             posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
 183           }
 184           else
 185           {
 186             addToTokenQueue(pat.substring(startSubstring, i));
 187           }
 188         }
 189 
 190         startSubstring = i;
 191 
 192         for (i++; (i < nChars) && ((c = pat.charAt(i)) != '\''); i++);
 193 
 194         if (c == '\'' && i < nChars)
 195         {
 196           addToTokenQueue(pat.substring(startSubstring, i + 1));
 197 
 198           startSubstring = -1;
 199         }
 200         else
 201         {
 202           m_processor.error(XPATHErrorResources.ER_EXPECTED_SINGLE_QUOTE,
 203                             null);  //"misquoted literal... expected single quote!");
 204         }
 205         break;
 206       case 0x0A :
 207       case 0x0D :
 208       case ' ' :
 209       case '\t' :
 210         if (startSubstring != -1)
 211         {
 212           isNum = false;
 213           isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
 214           isAttrName = false;
 215 
 216           if (-1 != posOfNSSep)
 217           {
 218             posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
 219           }
 220           else
 221           {
 222             addToTokenQueue(pat.substring(startSubstring, i));
 223           }
 224 
 225           startSubstring = -1;
 226         }
 227         break;
 228       case '@' :
 229         isAttrName = true;
 230 
 231       // fall-through on purpose
 232       case '-' :
 233         if ('-' == c)
 234         {
 235           if (!(isNum || (startSubstring == -1)))
 236           {
 237             break;
 238           }
 239 
 240           isNum = false;
 241         }
 242 
 243       // fall-through on purpose
 244       case '(' :
 245       case '[' :
 246       case ')' :
 247       case ']' :
 248       case '|' :
 249       case '/' :
 250       case '*' :
 251       case '+' :
 252       case '=' :
 253       case ',' :
 254       case '\\' :  // Unused at the moment
 255       case '^' :  // Unused at the moment
 256       case '!' :  // Unused at the moment
 257       case '$' :
 258       case '<' :
 259       case '>' :
 260         if (startSubstring != -1)
 261         {
 262           isNum = false;
 263           isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
 264           isAttrName = false;
 265 
 266           if (-1 != posOfNSSep)
 267           {
 268             posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
 269           }
 270           else
 271           {
 272             addToTokenQueue(pat.substring(startSubstring, i));
 273           }
 274 
 275           startSubstring = -1;
 276         }
 277         else if (('/' == c) && isStartOfPat)
 278         {
 279           isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
 280         }
 281         else if ('*' == c)
 282         {
 283           isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
 284           isAttrName = false;
 285         }
 286 
 287         if (0 == nesting)
 288         {
 289           if ('|' == c)
 290           {
 291             if (null != targetStrings)
 292             {
 293               recordTokenString(targetStrings);
 294             }
 295 
 296             isStartOfPat = true;
 297           }
 298         }
 299 
 300         if ((')' == c) || (']' == c))
 301         {
 302           nesting--;
 303         }
 304         else if (('(' == c) || ('[' == c))
 305         {
 306           nesting++;
 307         }
 308 
 309         addToTokenQueue(pat.substring(i, i + 1));
 310         break;
 311       case ':' :
 312         if (i>0)
 313         {
 314           if (posOfNSSep == (i - 1))
 315           {
 316             if (startSubstring != -1)
 317             {
 318               if (startSubstring < (i - 1))
 319                 addToTokenQueue(pat.substring(startSubstring, i - 1));
 320             }
 321 
 322             isNum = false;
 323             isAttrName = false;
 324             startSubstring = -1;
 325             posOfNSSep = -1;
 326 
 327             addToTokenQueue(pat.substring(i - 1, i + 1));
 328 
 329             break;
 330           }
 331           else
 332           {
 333             posOfNSSep = i;
 334           }
 335         }
 336 
 337       // fall through on purpose
 338       default :
 339         if (-1 == startSubstring)
 340         {
 341           startSubstring = i;
 342           isNum = Character.isDigit(c);
 343         }
 344         else if (isNum)
 345         {
 346           isNum = Character.isDigit(c);
 347         }
 348       }
 349     }
 350 
 351     if (startSubstring != -1)
 352     {
 353       isNum = false;
 354       isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
 355 
 356       if ((-1 != posOfNSSep) ||
 357          ((m_namespaceContext != null) && (m_namespaceContext.handlesNullPrefixes())))
 358       {
 359         posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, nChars);
 360       }
 361       else
 362       {
 363         addToTokenQueue(pat.substring(startSubstring, nChars));
 364       }
 365     }
 366 
 367     if (0 == m_compiler.getTokenQueueSize())
 368     {
 369       m_processor.error(XPATHErrorResources.ER_EMPTY_EXPRESSION, null);  //"Empty expression!");
 370     }
 371     else if (null != targetStrings)
 372     {
 373       recordTokenString(targetStrings);
 374     }
 375 
 376     m_processor.m_queueMark = 0;
 377   }
 378 
 379   /**
 380    * Record the current position on the token queue as long as
 381    * this is a top-level element.  Must be called before the
 382    * next token is added to the m_tokenQueue.
 383    *
 384    * @param nesting The nesting count for the pattern element.
 385    * @param isStart true if this is the start of a pattern.
 386    * @param isAttrName true if we have determined that this is an attribute name.
 387    *
 388    * @return true if this is the start of a pattern.
 389    */
 390   private boolean mapPatternElemPos(int nesting, boolean isStart,
 391                                     boolean isAttrName)
 392   {
 393 
 394     if (0 == nesting)
 395     {
 396       if(m_patternMapSize >= m_patternMap.length)
 397       {
 398         int patternMap[] = m_patternMap;
 399         int len = m_patternMap.length;
 400         m_patternMap = new int[m_patternMapSize + 100];
 401         System.arraycopy(patternMap, 0, m_patternMap, 0, len);
 402       }
 403       if (!isStart)
 404       {
 405         m_patternMap[m_patternMapSize - 1] -= TARGETEXTRA;
 406       }
 407       m_patternMap[m_patternMapSize] =
 408         (m_compiler.getTokenQueueSize() - (isAttrName ? 1 : 0)) + TARGETEXTRA;
 409 
 410       m_patternMapSize++;
 411 
 412       isStart = false;
 413     }
 414 
 415     return isStart;
 416   }
 417 
 418   /**
 419    * Given a map pos, return the corresponding token queue pos.
 420    *
 421    * @param i The index in the m_patternMap.
 422    *
 423    * @return the token queue position.
 424    */
 425   private int getTokenQueuePosFromMap(int i)
 426   {
 427 
 428     int pos = m_patternMap[i];
 429 
 430     return (pos >= TARGETEXTRA) ? (pos - TARGETEXTRA) : pos;
 431   }
 432 
 433   /**
 434    * Reset token queue mark and m_token to a
 435    * given position.
 436    * @param mark The new position.
 437    */
 438   private final void resetTokenMark(int mark)
 439   {
 440 
 441     int qsz = m_compiler.getTokenQueueSize();
 442 
 443     m_processor.m_queueMark = (mark > 0)
 444                               ? ((mark <= qsz) ? mark - 1 : mark) : 0;
 445 
 446     if (m_processor.m_queueMark < qsz)
 447     {
 448       m_processor.m_token =
 449         (String) m_compiler.getTokenQueue().elementAt(m_processor.m_queueMark++);
 450       m_processor.m_tokenChar = m_processor.m_token.charAt(0);
 451     }
 452     else
 453     {
 454       m_processor.m_token = null;
 455       m_processor.m_tokenChar = 0;
 456     }
 457   }
 458 
 459   /**
 460    * Given a string, return the corresponding keyword token.
 461    *
 462    * @param key The keyword.
 463    *
 464    * @return An opcode value.
 465    */
 466   final int getKeywordToken(String key)
 467   {
 468 
 469     int tok;
 470 
 471     try
 472     {
 473       Integer itok = (Integer) Keywords.getKeyWord(key);
 474 
 475       tok = (null != itok) ? itok.intValue() : 0;
 476     }
 477     catch (NullPointerException npe)
 478     {
 479       tok = 0;
 480     }
 481     catch (ClassCastException cce)
 482     {
 483       tok = 0;
 484     }
 485 
 486     return tok;
 487   }
 488 
 489   /**
 490    * Record the current token in the passed vector.
 491    *
 492    * @param targetStrings a list of strings.
 493    */
 494   private void recordTokenString(List<String> targetStrings)
 495   {
 496 
 497     int tokPos = getTokenQueuePosFromMap(m_patternMapSize - 1);
 498 
 499     resetTokenMark(tokPos + 1);
 500 
 501     if (m_processor.lookahead('(', 1))
 502     {
 503       int tok = getKeywordToken(m_processor.m_token);
 504 
 505       switch (tok)
 506       {
 507       case OpCodes.NODETYPE_COMMENT :
 508         targetStrings.add(PsuedoNames.PSEUDONAME_COMMENT);
 509         break;
 510       case OpCodes.NODETYPE_TEXT :
 511         targetStrings.add(PsuedoNames.PSEUDONAME_TEXT);
 512         break;
 513       case OpCodes.NODETYPE_NODE :
 514         targetStrings.add(PsuedoNames.PSEUDONAME_ANY);
 515         break;
 516       case OpCodes.NODETYPE_ROOT :
 517         targetStrings.add(PsuedoNames.PSEUDONAME_ROOT);
 518         break;
 519       case OpCodes.NODETYPE_ANYELEMENT :
 520         targetStrings.add(PsuedoNames.PSEUDONAME_ANY);
 521         break;
 522       case OpCodes.NODETYPE_PI :
 523         targetStrings.add(PsuedoNames.PSEUDONAME_ANY);
 524         break;
 525       default :
 526         targetStrings.add(PsuedoNames.PSEUDONAME_ANY);
 527       }
 528     }
 529     else
 530     {
 531       if (m_processor.tokenIs('@'))
 532       {
 533         tokPos++;
 534 
 535         resetTokenMark(tokPos + 1);
 536       }
 537 
 538       if (m_processor.lookahead(':', 1))
 539       {
 540         tokPos += 2;
 541       }
 542 
 543       targetStrings.add((String)m_compiler.getTokenQueue().elementAt(tokPos));
 544     }
 545   }
 546 
 547   /**
 548    * Add a token to the token queue.
 549    *
 550    *
 551    * @param s The token.
 552    */
 553   private final void addToTokenQueue(String s)
 554   {
 555     m_compiler.getTokenQueue().addElement(s);
 556   }
 557 
 558   /**
 559    * When a seperator token is found, see if there's a element name or
 560    * the like to map.
 561    *
 562    * @param pat The XPath name string.
 563    * @param startSubstring The start of the name string.
 564    * @param posOfNSSep The position of the namespace seperator (':').
 565    * @param posOfScan The end of the name index.
 566    *
 567    * @throws javax.xml.transform.TransformerException
 568    *
 569    * @return -1 always.
 570    */
 571   private int mapNSTokens(String pat, int startSubstring, int posOfNSSep,
 572                           int posOfScan)
 573            throws javax.xml.transform.TransformerException
 574  {
 575 
 576     String prefix = "";
 577 
 578     if ((startSubstring >= 0) && (posOfNSSep >= 0))
 579     {
 580        prefix = pat.substring(startSubstring, posOfNSSep);
 581     }
 582     String uName;
 583 
 584     if ((null != m_namespaceContext) &&!prefix.equals("*")
 585             &&!prefix.equals("xmlns"))
 586     {
 587       try
 588       {
 589         if (prefix.length() > 0)
 590           uName = ((PrefixResolver) m_namespaceContext).getNamespaceForPrefix(
 591             prefix);
 592         else
 593         {
 594 
 595           // Assume last was wildcard. This is not legal according
 596           // to the draft. Set the below to true to make namespace
 597           // wildcards work.
 598           if (false)
 599           {
 600             addToTokenQueue(":");
 601 
 602             String s = pat.substring(posOfNSSep + 1, posOfScan);
 603 
 604             if (s.length() > 0)
 605               addToTokenQueue(s);
 606 
 607             return -1;
 608           }
 609           else
 610           {
 611             uName =
 612               ((PrefixResolver) m_namespaceContext).getNamespaceForPrefix(
 613                 prefix);
 614           }
 615         }
 616       }
 617       catch (ClassCastException cce)
 618       {
 619         uName = m_namespaceContext.getNamespaceForPrefix(prefix);
 620       }
 621     }
 622     else
 623     {
 624       uName = prefix;
 625     }
 626 
 627     if ((null != uName) && (uName.length() > 0))
 628     {
 629       addToTokenQueue(uName);
 630       addToTokenQueue(":");
 631 
 632       String s = pat.substring(posOfNSSep + 1, posOfScan);
 633 
 634       if (s.length() > 0)
 635         addToTokenQueue(s);
 636     }
 637     else
 638     {
 639         m_processor.error(XPATHErrorResources.ER_PREFIX_MUST_RESOLVE,
 640                 new String[] {prefix});  //"Prefix must resolve to a namespace: {0}";
 641     }
 642 
 643     return -1;
 644   }
 645 }