1 /*
   2  * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
   3  */
   4 /*
   5  * Licensed to the Apache Software Foundation (ASF) under one or more
   6  * contributor license agreements.  See the NOTICE file distributed with
   7  * this work for additional information regarding copyright ownership.
   8  * The ASF licenses this file to You under the Apache License, Version 2.0
   9  * (the "License"); you may not use this file except in compliance with
  10  * the License.  You may obtain a copy of the License at
  11  *
  12  *      http://www.apache.org/licenses/LICENSE-2.0
  13  *
  14  * Unless required by applicable law or agreed to in writing, software
  15  * distributed under the License is distributed on an "AS IS" BASIS,
  16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17  * See the License for the specific language governing permissions and
  18  * limitations under the License.
  19  */
  20 
  21 package com.sun.org.apache.xpath.internal.compiler;
  22 
  23 import com.sun.org.apache.xml.internal.utils.PrefixResolver;
  24 import com.sun.org.apache.xpath.internal.res.XPATHErrorResources;
  25 import java.util.List;
  26 
  27 /**
  28  * This class is in charge of lexical processing of the XPath
  29  * expression into tokens.
  30  *
  31  * @LastModified: Nov 2017
  32  */
  33 class Lexer
  34 {
  35 
  36   /**
  37    * The target XPath.
  38    */
  39   private Compiler m_compiler;
  40 
  41   /**
  42    * The prefix resolver to map prefixes to namespaces in the XPath.
  43    */
  44   PrefixResolver m_namespaceContext;
  45 
  46   /**
  47    * The XPath processor object.
  48    */
  49   XPathParser m_processor;
  50 
  51   /**
  52    * This value is added to each element name in the TARGETEXTRA
  53    * that is a 'target' (right-most top-level element name).
  54    */
  55   static final int TARGETEXTRA = 10000;
  56 
  57   /**
  58    * Ignore this, it is going away.
  59    * This holds a map to the m_tokenQueue that tells where the top-level elements are.
  60    * It is used for pattern matching so the m_tokenQueue can be walked backwards.
  61    * Each element that is a 'target', (right-most top level element name) has
  62    * TARGETEXTRA added to it.
  63    *
  64    */
  65   private int m_patternMap[] = new int[100];
  66 
  67   /**
  68    * Ignore this, it is going away.
  69    * The number of elements that m_patternMap maps;
  70    */
  71   private int m_patternMapSize;
  72 
  73   /**
  74    * Create a Lexer object.
  75    *
  76    * @param compiler The owning compiler for this lexer.
  77    * @param resolver The prefix resolver for mapping qualified name prefixes
  78    *                 to namespace URIs.
  79    * @param xpathProcessor The parser that is processing strings to opcodes.
  80    */
  81   Lexer(Compiler compiler, PrefixResolver resolver,
  82         XPathParser xpathProcessor)
  83   {
  84 
  85     m_compiler = compiler;
  86     m_namespaceContext = resolver;
  87     m_processor = xpathProcessor;
  88   }
  89 
  90   /**
  91    * Walk through the expression and build a token queue, and a map of the top-level
  92    * elements.
  93    * @param pat XSLT Expression.
  94    *
  95    * @throws javax.xml.transform.TransformerException
  96    */
  97   void tokenize(String pat) throws javax.xml.transform.TransformerException
  98   {
  99     tokenize(pat, null);
 100   }
 101 
 102   /**
 103    * Walk through the expression and build a token queue, and a map of the top-level
 104    * elements.
 105    * @param pat XSLT Expression.
 106    * @param targetStrings a list to hold Strings, may be null.
 107    *
 108    * @throws javax.xml.transform.TransformerException
 109    */
 110   @SuppressWarnings("fallthrough") // on purpose at case '-', '(' and default
 111   void tokenize(String pat, List<String> targetStrings)
 112           throws javax.xml.transform.TransformerException
 113   {
 114 
 115     m_compiler.m_currentPattern = pat;
 116     m_patternMapSize = 0;
 117 
 118     // This needs to grow too.
 119     m_compiler.m_opMap = new OpMapVector(OpMap.MAXTOKENQUEUESIZE * 5, OpMap.BLOCKTOKENQUEUESIZE * 5, OpMap.MAPINDEX_LENGTH);
 120 
 121     int nChars = pat.length();
 122     int startSubstring = -1;
 123     int posOfNSSep = -1;
 124     boolean isStartOfPat = true;
 125     boolean isAttrName = false;
 126     boolean isNum = false;
 127 
 128     // Nesting of '[' so we can know if the given element should be
 129     // counted inside the m_patternMap.
 130     int nesting = 0;
 131 
 132     // char[] chars = pat.toCharArray();
 133     for (int i = 0; i < nChars; i++)
 134     {
 135       char c = pat.charAt(i);
 136 
 137       switch (c)
 138       {
 139       case '\"' :
 140       {
 141         if (startSubstring != -1)
 142         {
 143           isNum = false;
 144           isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
 145           isAttrName = false;
 146 
 147           if (-1 != posOfNSSep)
 148           {
 149             posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
 150           }
 151           else
 152           {
 153             addToTokenQueue(pat.substring(startSubstring, i));
 154           }
 155         }
 156 
 157         startSubstring = i;
 158 
 159         for (i++; (i < nChars) && ((c = pat.charAt(i)) != '\"'); i++);
 160 
 161         if (c == '\"' && i < nChars)
 162         {
 163           addToTokenQueue(pat.substring(startSubstring, i + 1));
 164 
 165           startSubstring = -1;
 166         }
 167         else
 168         {
 169           m_processor.error(XPATHErrorResources.ER_EXPECTED_DOUBLE_QUOTE,
 170                             null);  //"misquoted literal... expected double quote!");
 171         }
 172       }
 173       break;
 174       case '\'' :
 175         if (startSubstring != -1)
 176         {
 177           isNum = false;
 178           isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
 179           isAttrName = false;
 180 
 181           if (-1 != posOfNSSep)
 182           {
 183             posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
 184           }
 185           else
 186           {
 187             addToTokenQueue(pat.substring(startSubstring, i));
 188           }
 189         }
 190 
 191         startSubstring = i;
 192 
 193         for (i++; (i < nChars) && ((c = pat.charAt(i)) != '\''); i++);
 194 
 195         if (c == '\'' && i < nChars)
 196         {
 197           addToTokenQueue(pat.substring(startSubstring, i + 1));
 198 
 199           startSubstring = -1;
 200         }
 201         else
 202         {
 203           m_processor.error(XPATHErrorResources.ER_EXPECTED_SINGLE_QUOTE,
 204                             null);  //"misquoted literal... expected single quote!");
 205         }
 206         break;
 207       case 0x0A :
 208       case 0x0D :
 209       case ' ' :
 210       case '\t' :
 211         if (startSubstring != -1)
 212         {
 213           isNum = false;
 214           isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
 215           isAttrName = false;
 216 
 217           if (-1 != posOfNSSep)
 218           {
 219             posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
 220           }
 221           else
 222           {
 223             addToTokenQueue(pat.substring(startSubstring, i));
 224           }
 225 
 226           startSubstring = -1;
 227         }
 228         break;
 229       case '@' :
 230         isAttrName = true;
 231 
 232       // fall-through on purpose
 233       case '-' :
 234         if ('-' == c)
 235         {
 236           if (!(isNum || (startSubstring == -1)))
 237           {
 238             break;
 239           }
 240 
 241           isNum = false;
 242         }
 243 
 244       // fall-through on purpose
 245       case '(' :
 246       case '[' :
 247       case ')' :
 248       case ']' :
 249       case '|' :
 250       case '/' :
 251       case '*' :
 252       case '+' :
 253       case '=' :
 254       case ',' :
 255       case '\\' :  // Unused at the moment
 256       case '^' :  // Unused at the moment
 257       case '!' :  // Unused at the moment
 258       case '$' :
 259       case '<' :
 260       case '>' :
 261         if (startSubstring != -1)
 262         {
 263           isNum = false;
 264           isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
 265           isAttrName = false;
 266 
 267           if (-1 != posOfNSSep)
 268           {
 269             posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, i);
 270           }
 271           else
 272           {
 273             addToTokenQueue(pat.substring(startSubstring, i));
 274           }
 275 
 276           startSubstring = -1;
 277         }
 278         else if (('/' == c) && isStartOfPat)
 279         {
 280           isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
 281         }
 282         else if ('*' == c)
 283         {
 284           isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
 285           isAttrName = false;
 286         }
 287 
 288         if (0 == nesting)
 289         {
 290           if ('|' == c)
 291           {
 292             if (null != targetStrings)
 293             {
 294               recordTokenString(targetStrings);
 295             }
 296 
 297             isStartOfPat = true;
 298           }
 299         }
 300 
 301         if ((')' == c) || (']' == c))
 302         {
 303           nesting--;
 304         }
 305         else if (('(' == c) || ('[' == c))
 306         {
 307           nesting++;
 308         }
 309 
 310         addToTokenQueue(pat.substring(i, i + 1));
 311         break;
 312       case ':' :
 313         if (i>0)
 314         {
 315           if (posOfNSSep == (i - 1))
 316           {
 317             if (startSubstring != -1)
 318             {
 319               if (startSubstring < (i - 1))
 320                 addToTokenQueue(pat.substring(startSubstring, i - 1));
 321             }
 322 
 323             isNum = false;
 324             isAttrName = false;
 325             startSubstring = -1;
 326             posOfNSSep = -1;
 327 
 328             addToTokenQueue(pat.substring(i - 1, i + 1));
 329 
 330             break;
 331           }
 332           else
 333           {
 334             posOfNSSep = i;
 335           }
 336         }
 337 
 338       // fall through on purpose
 339       default :
 340         if (-1 == startSubstring)
 341         {
 342           startSubstring = i;
 343           isNum = Character.isDigit(c);
 344         }
 345         else if (isNum)
 346         {
 347           isNum = Character.isDigit(c);
 348         }
 349       }
 350     }
 351 
 352     if (startSubstring != -1)
 353     {
 354       isNum = false;
 355       isStartOfPat = mapPatternElemPos(nesting, isStartOfPat, isAttrName);
 356 
 357       if ((-1 != posOfNSSep) ||
 358          ((m_namespaceContext != null) && (m_namespaceContext.handlesNullPrefixes())))
 359       {
 360         posOfNSSep = mapNSTokens(pat, startSubstring, posOfNSSep, nChars);
 361       }
 362       else
 363       {
 364         addToTokenQueue(pat.substring(startSubstring, nChars));
 365       }
 366     }
 367 
 368     if (0 == m_compiler.getTokenQueueSize())
 369     {
 370       m_processor.error(XPATHErrorResources.ER_EMPTY_EXPRESSION, null);  //"Empty expression!");
 371     }
 372     else if (null != targetStrings)
 373     {
 374       recordTokenString(targetStrings);
 375     }
 376 
 377     m_processor.m_queueMark = 0;
 378   }
 379 
 380   /**
 381    * Record the current position on the token queue as long as
 382    * this is a top-level element.  Must be called before the
 383    * next token is added to the m_tokenQueue.
 384    *
 385    * @param nesting The nesting count for the pattern element.
 386    * @param isStart true if this is the start of a pattern.
 387    * @param isAttrName true if we have determined that this is an attribute name.
 388    *
 389    * @return true if this is the start of a pattern.
 390    */
 391   private boolean mapPatternElemPos(int nesting, boolean isStart,
 392                                     boolean isAttrName)
 393   {
 394 
 395     if (0 == nesting)
 396     {
 397       if(m_patternMapSize >= m_patternMap.length)
 398       {
 399         int patternMap[] = m_patternMap;
 400         int len = m_patternMap.length;
 401         m_patternMap = new int[m_patternMapSize + 100];
 402         System.arraycopy(patternMap, 0, m_patternMap, 0, len);
 403       }
 404       if (!isStart)
 405       {
 406         m_patternMap[m_patternMapSize - 1] -= TARGETEXTRA;
 407       }
 408       m_patternMap[m_patternMapSize] =
 409         (m_compiler.getTokenQueueSize() - (isAttrName ? 1 : 0)) + TARGETEXTRA;
 410 
 411       m_patternMapSize++;
 412 
 413       isStart = false;
 414     }
 415 
 416     return isStart;
 417   }
 418 
 419   /**
 420    * Given a map pos, return the corresponding token queue pos.
 421    *
 422    * @param i The index in the m_patternMap.
 423    *
 424    * @return the token queue position.
 425    */
 426   private int getTokenQueuePosFromMap(int i)
 427   {
 428 
 429     int pos = m_patternMap[i];
 430 
 431     return (pos >= TARGETEXTRA) ? (pos - TARGETEXTRA) : pos;
 432   }
 433 
 434   /**
 435    * Reset token queue mark and m_token to a
 436    * given position.
 437    * @param mark The new position.
 438    */
 439   private final void resetTokenMark(int mark)
 440   {
 441 
 442     int qsz = m_compiler.getTokenQueueSize();
 443 
 444     m_processor.m_queueMark = (mark > 0)
 445                               ? ((mark <= qsz) ? mark - 1 : mark) : 0;
 446 
 447     if (m_processor.m_queueMark < qsz)
 448     {
 449       m_processor.m_token =
 450         (String) m_compiler.getTokenQueue().elementAt(m_processor.m_queueMark++);
 451       m_processor.m_tokenChar = m_processor.m_token.charAt(0);
 452     }
 453     else
 454     {
 455       m_processor.m_token = null;
 456       m_processor.m_tokenChar = 0;
 457     }
 458   }
 459 
 460   /**
 461    * Given a string, return the corresponding keyword token.
 462    *
 463    * @param key The keyword.
 464    *
 465    * @return An opcode value.
 466    */
 467   final int getKeywordToken(String key)
 468   {
 469 
 470     int tok;
 471 
 472     try
 473     {
 474       Integer itok = Keywords.getKeyWord(key);
 475 
 476       tok = (null != itok) ? itok.intValue() : 0;
 477     }
 478     catch (NullPointerException npe)
 479     {
 480       tok = 0;
 481     }
 482     catch (ClassCastException cce)
 483     {
 484       tok = 0;
 485     }
 486 
 487     return tok;
 488   }
 489 
 490   /**
 491    * Record the current token in the passed vector.
 492    *
 493    * @param targetStrings a list of strings.
 494    */
 495   private void recordTokenString(List<String> targetStrings)
 496   {
 497 
 498     int tokPos = getTokenQueuePosFromMap(m_patternMapSize - 1);
 499 
 500     resetTokenMark(tokPos + 1);
 501 
 502     if (m_processor.lookahead('(', 1))
 503     {
 504       int tok = getKeywordToken(m_processor.m_token);
 505 
 506       switch (tok)
 507       {
 508       case OpCodes.NODETYPE_COMMENT :
 509         targetStrings.add(PsuedoNames.PSEUDONAME_COMMENT);
 510         break;
 511       case OpCodes.NODETYPE_TEXT :
 512         targetStrings.add(PsuedoNames.PSEUDONAME_TEXT);
 513         break;
 514       case OpCodes.NODETYPE_NODE :
 515         targetStrings.add(PsuedoNames.PSEUDONAME_ANY);
 516         break;
 517       case OpCodes.NODETYPE_ROOT :
 518         targetStrings.add(PsuedoNames.PSEUDONAME_ROOT);
 519         break;
 520       case OpCodes.NODETYPE_ANYELEMENT :
 521         targetStrings.add(PsuedoNames.PSEUDONAME_ANY);
 522         break;
 523       case OpCodes.NODETYPE_PI :
 524         targetStrings.add(PsuedoNames.PSEUDONAME_ANY);
 525         break;
 526       default :
 527         targetStrings.add(PsuedoNames.PSEUDONAME_ANY);
 528       }
 529     }
 530     else
 531     {
 532       if (m_processor.tokenIs('@'))
 533       {
 534         tokPos++;
 535 
 536         resetTokenMark(tokPos + 1);
 537       }
 538 
 539       if (m_processor.lookahead(':', 1))
 540       {
 541         tokPos += 2;
 542       }
 543 
 544       targetStrings.add((String)m_compiler.getTokenQueue().elementAt(tokPos));
 545     }
 546   }
 547 
 548   /**
 549    * Add a token to the token queue.
 550    *
 551    *
 552    * @param s The token.
 553    */
 554   private final void addToTokenQueue(String s)
 555   {
 556     m_compiler.getTokenQueue().addElement(s);
 557   }
 558 
 559   /**
 560    * When a seperator token is found, see if there's a element name or
 561    * the like to map.
 562    *
 563    * @param pat The XPath name string.
 564    * @param startSubstring The start of the name string.
 565    * @param posOfNSSep The position of the namespace seperator (':').
 566    * @param posOfScan The end of the name index.
 567    *
 568    * @throws javax.xml.transform.TransformerException
 569    *
 570    * @return -1 always.
 571    */
 572   private int mapNSTokens(String pat, int startSubstring, int posOfNSSep,
 573                           int posOfScan)
 574            throws javax.xml.transform.TransformerException
 575  {
 576 
 577     String prefix = "";
 578 
 579     if ((startSubstring >= 0) && (posOfNSSep >= 0))
 580     {
 581        prefix = pat.substring(startSubstring, posOfNSSep);
 582     }
 583     String uName;
 584 
 585     if ((null != m_namespaceContext) &&!prefix.equals("*")
 586             &&!prefix.equals("xmlns"))
 587     {
 588       try
 589       {
 590         if (prefix.length() > 0)
 591           uName = m_namespaceContext.getNamespaceForPrefix(prefix);
 592         else
 593         {
 594 
 595           // Assume last was wildcard. This is not legal according
 596           // to the draft. Set the below to true to make namespace
 597           // wildcards work.
 598           if (false)
 599           {
 600             addToTokenQueue(":");
 601 
 602             String s = pat.substring(posOfNSSep + 1, posOfScan);
 603 
 604             if (s.length() > 0)
 605               addToTokenQueue(s);
 606 
 607             return -1;
 608           }
 609           else
 610           {
 611             uName = m_namespaceContext.getNamespaceForPrefix(prefix);
 612           }
 613         }
 614       }
 615       catch (ClassCastException cce)
 616       {
 617         uName = m_namespaceContext.getNamespaceForPrefix(prefix);
 618       }
 619     }
 620     else
 621     {
 622       uName = prefix;
 623     }
 624 
 625     if ((null != uName) && (uName.length() > 0))
 626     {
 627       addToTokenQueue(uName);
 628       addToTokenQueue(":");
 629 
 630       String s = pat.substring(posOfNSSep + 1, posOfScan);
 631 
 632       if (s.length() > 0)
 633         addToTokenQueue(s);
 634     }
 635     else
 636     {
 637         m_processor.error(XPATHErrorResources.ER_PREFIX_MUST_RESOLVE,
 638                 new String[] {prefix});  //"Prefix must resolve to a namespace: {0}";
 639     }
 640 
 641     return -1;
 642   }
 643 }