1 /*
   2  * Copyright (c) 1999, 2001, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 /*
  26  * COMPONENT_NAME: idl.parser
  27  *
  28  * ORIGINS: 27
  29  *
  30  * Licensed Materials - Property of IBM
  31  * 5639-D57 (C) COPYRIGHT International Business Machines Corp. 1997, 1999
  32  * RMI-IIOP v1.0
  33  *
  34  */
  35 
  36 package com.sun.tools.corba.se.idl;
  37 
  38 // NOTES:
  39 // -F46082.51<daz> Remove -stateful feature.
  40 // -D59166<daz> Add support for keyword/identifier collision detection.  This
  41 //  feature is implemented here, rather than class Scanner, to allow the Parser
  42 //  to handle the problem.
  43 // -F60858.1<daz> Support -corba option, level <= 2.2: identify 2.3 keywords.
  44 // -D62023<daz> Support -corba option, level <= 2.3, identify 2.4 keywords.
  45 // KMC Support -corba, level <= 3.0.  Added 3.0 keywords.
  46 //
  47 // Should escaped Identifier should be a type rather than an attribute?
  48 //
  49 
  50 /**
  51  * Class Token represents a lexeme appearing within an IDL source.  Every
  52  * Token has a type.  Depending on its type and on the supported version
  53  * of IDL, a Token will have other associated attributes, such as a name
  54  * (identifier, e.g.), and whether it is escaped, deprecated, or is a type
  55  * that is known to be in a future version of IDL.
  56  **/
  57 class Token
  58 {
  59   ///////////////
  60   // Available types
  61 
  62   static final int                // Keywords
  63       Any                  =   0, // 2.2
  64       Attribute            =   1, // |
  65       Boolean              =   2, // .
  66       Case                 =   3, // .
  67       Char                 =   4, // .
  68       Const                =   5,
  69       Context              =   6,
  70       Default              =   7,
  71       Double               =   8,
  72       Enum                 =   9,
  73       Exception            =  10,
  74       FALSE                =  11,
  75       Fixed                =  12, // New addition
  76       Float                =  13,
  77       In                   =  14,
  78       Inout                =  15,
  79       Interface            =  16,
  80       Long                 =  17,
  81       Module               =  18,
  82       Native               =  19, // New addition
  83       Object               =  20,
  84       Octet                =  21,
  85       Oneway               =  22,
  86       Out                  =  23,
  87       Raises               =  24,
  88       Readonly             =  25,
  89       Sequence             =  26,
  90       Short                =  27,
  91       String               =  28,
  92       Struct               =  29,
  93       Switch               =  30,
  94       TRUE                 =  31,
  95       Typedef              =  32,
  96       Unsigned             =  33, // .
  97       Union                =  34, // .
  98       Void                 =  35, // .
  99       Wchar                =  36, // |
 100       Wstring              =  37, // 2.2
 101       // <f46082.40> New OBV keywords...
 102       // <d62023> In 2.4rtf, "factory" is synonymous to "init" in 2.3
 103       Init                 =  38, // 2.3 only
 104       Abstract             =  39, // 2.3        2.4rtf
 105       Custom               =  40, // |          |
 106       Private              =  41, // |          |
 107       Public               =  42, // |          |
 108       Supports             =  43, // |          |
 109       Truncatable          =  44, // |          |
 110       ValueBase            =  45, // |          |
 111       Valuetype            =  46, // 2.3        2.4rtf
 112       Factory              =  47, //            2.4rtf only
 113 
 114       // Keywords in CORBA 3.0
 115       Component            =  48,
 116       Consumes             =  49,
 117       Emits                =  50,
 118       Finder               =  51,
 119       GetRaises            =  52,
 120       Home                 =  53,
 121       Import               =  54,
 122       Local                =  55,
 123       Manages              =  56,
 124       Multiple             =  57,
 125       PrimaryKey           =  58,
 126       Provides             =  59,
 127       Publishes            =  60,
 128       SetRaises            =  61,
 129       TypeId               =  62,
 130       TypePrefix           =  63,
 131       Uses                 =  64,
 132 
 133       Identifier           =  80, // Identifier
 134       MacroIdentifier      =  81, // Macro Identifier
 135 
 136       Semicolon            = 100, // Symbols
 137       LeftBrace            = 101,
 138       RightBrace           = 102,
 139       Colon                = 103,
 140       Comma                = 104,
 141       Equal                = 105,
 142       Plus                 = 106,
 143       Minus                = 107,
 144       LeftParen            = 108,
 145       RightParen           = 109,
 146       LessThan             = 110,
 147       GreaterThan          = 111,
 148       LeftBracket          = 112,
 149       RightBracket         = 113,
 150       Apostrophe           = 114,
 151       Quote                = 115,
 152       Backslash            = 116,
 153       Bar                  = 117,
 154       Carat                = 118,
 155       Ampersand            = 119,
 156       Star                 = 120,
 157       Slash                = 121,
 158       Percent              = 122,
 159       Tilde                = 123,
 160       DoubleColon          = 124,
 161       ShiftLeft            = 125,
 162       ShiftRight           = 126,
 163       Period               = 127,
 164       Hash                 = 128,
 165       Exclamation          = 129,
 166       DoubleEqual          = 130,
 167       NotEqual             = 131,
 168       GreaterEqual         = 132,
 169       LessEqual            = 133,
 170       DoubleBar            = 134,
 171       DoubleAmpersand      = 135,
 172 
 173       BooleanLiteral       = 200, // Literals
 174       CharacterLiteral     = 201,
 175       IntegerLiteral       = 202,
 176       FloatingPointLiteral = 203,
 177       StringLiteral        = 204,
 178       Literal              = 205,
 179 
 180       Define               = 300, // Directives
 181       Undef                = 301,
 182       If                   = 302,
 183       Ifdef                = 303,
 184       Ifndef               = 304,
 185       Else                 = 305,
 186       Elif                 = 306,
 187       Include              = 307,
 188       Endif                = 308,
 189       Line                 = 309,
 190       Error                = 310,
 191       Pragma               = 311,
 192       Null                 = 312,
 193       Unknown              = 313,
 194 
 195       Defined              = 400,
 196 
 197       // <f46082.40> Keyword identifiers.
 198       //Abstract             = 500,
 199       //Custom               = 501,
 200       //Init                 = 502,
 201       //Private2             = 503,
 202       //Public2              = 504,
 203       //Supports             = 505,
 204       //Truncatable          = 506,
 205       //ValueBase            = 507,
 206       //Valuetype            = 508,
 207 
 208       EOF                  = 999; // End of Input
 209 
 210   // Available types
 211   ///////////////
 212   // Keywords
 213 
 214   static final String [] Keywords = {
 215       "any",         "attribute",    "boolean",
 216       "case",        "char",         "const",
 217       "context",     "default",      "double",
 218       "enum",        "exception",    "FALSE",      "fixed",
 219       "float",       "in",           "inout",
 220       "interface",   "long",         "module",     "native",
 221       "Object",      "octet",        "oneway",
 222       "out",         "raises",       "readonly",
 223       "sequence",    "short",        "string",
 224       "struct",      "switch",       "TRUE",
 225       "typedef",     "unsigned",     "union",
 226       "void",        "wchar",        "wstring",
 227       "init", // In 2.3 only
 228       "abstract",     "custom",      "private",      // 2.3 and 2.4rtf
 229       "public",       "supports",    "truncatable",
 230       "ValueBase",    "valuetype",
 231       "factory",  // In 2.4rtf only
 232       // CORBA 3.0 keywords
 233       "component",      "consumes",     "emits",
 234       "finder",         "getRaises",    "home",
 235       "import",         "local",        "manages",
 236       "multiple",       "primaryKey",   "provides",
 237       "publishes",      "setRaises",    "supports",
 238       "typeId",         "typePrefix",   "uses" } ;
 239 
 240   // <f46082.40> Remove keyword identifiers.
 241   //static final int
 242   //    FirstKeywordIdentifier = 500,
 243   //    LastKeywordIdentifier  = Valuetype;
 244   //
 245   //static final String[] KeywordIdentifiers = {
 246   //    "abstract",    "custom",    "init",
 247   //    "private",     "public",    "supports",
 248   //    "truncatable", "valueBase", "valuetype"};
 249 
 250   /**
 251    * Determine whether this token is a keyword.
 252    * @return true iff this token is a keyword.
 253    **/
 254   boolean isKeyword ()
 255   {
 256     return type >= FirstKeyword && type <= LastKeyword;
 257   } // isKeyword
 258 
 259   private static final int
 260       FirstKeyword = Any, // 0
 261       LastKeyword  = Uses;
 262 
 263   // <f60858.1> Keywords in CORBA 2.2 that we support.
 264   private static final int
 265       First22Keyword = Any, // 0
 266       Last22Keyword  = Wstring;
 267 
 268   // <f60858.1> New keywords in CORBA 2.3 (preliminary) that we support.
 269   private static final int
 270       First23Keyword = Init,
 271       Last23Keyword  = Valuetype;
 272 
 273   // <d62023> New keywords in CORBA 2.4rtf (accepted 2.3) that we support.
 274   // Note that "factory" replaces "init".  Scanner must account for this in
 275   // keyword scan.
 276   private static final int
 277       First24rtfKeyword = Abstract,
 278       Last24rtfKeyword  = Factory;
 279 
 280   // New keywords in CORBA 3.0 (from CORBA components v. 1)
 281   private static final int
 282       First30Keyword    = Component,
 283       Last30Keyword     = Uses;
 284 
 285   // Current valid CORBA levels:
 286   // 2.2 (or <2.3): the default: no OBV support
 287   // 2.3: add OBV with init
 288   // >2.3: OBV with init replcaed by factory
 289   // 3.0: adds components, attr exceptions, local interfaces, type repository
 290   //      decls.
 291 
 292   private static final int CORBA_LEVEL_22 = 0 ;
 293   private static final int CORBA_LEVEL_23 = 1 ;
 294   private static final int CORBA_LEVEL_24RTF = 2 ;
 295   private static final int CORBA_LEVEL_30 = 3 ;
 296 
 297   // Do the conversion from a floating point CORBA level to an int
 298   private static int getLevel( float cLevel )
 299   {
 300     if (cLevel < 2.3f)
 301         return CORBA_LEVEL_22 ;
 302     if (Util.absDelta( cLevel, 2.3f ) < 0.001f)
 303         return CORBA_LEVEL_23 ;
 304     if (cLevel < 3.0f)
 305         return CORBA_LEVEL_24RTF ;
 306     return CORBA_LEVEL_30 ;
 307   }
 308 
 309   // Return the last keyword corresponding to a particular CORBA level
 310   private static int getLastKeyword( int level )
 311   {
 312     if (level == CORBA_LEVEL_22)
 313         return Last22Keyword ;
 314     if (level == CORBA_LEVEL_23)
 315         return Last23Keyword ;
 316     if (level == CORBA_LEVEL_24RTF)
 317         return Last24rtfKeyword ;
 318     return Last30Keyword ;
 319   }
 320 
 321   /** Create a keyword token from a string.
 322   * Determines whether the string is an IDL keyword based on the corbaLevel.
 323   * Strings that are keywords at higher CORBA levels than the corbaLevel
 324   * argument create identifier tokens that are marked as "collidesWithKeyword", unless
 325   * escapedOK is FALSE, which is the case only when preprocessing is taking place.
 326   * In the case of the "init" keyword, which was only defined in CORBA 2.3, init is
 327   * marked deprecated in CORBA 2.3 since it is not supported in higher levels.
 328   * @param String string The string we are converting to a token.
 329   * @param float corbaLevel The CORBA level, currently in the interval [2.2, 3.0].
 330   * @param boolean escapedOK Flag set true if _ is used to escape an IDL keyword for use
 331   * as an identifier.
 332   * @param boolean[] collidesWithKeyword is an array containing one value: a flag
 333   * representing whether this string is an identifier that collides with a keyword.
 334   * This is set by this method.
 335   * @return Token The resulting Token corresponding to string.
 336   */
 337   public static Token makeKeywordToken(
 338     String string, float corbaLevel, boolean escapedOK, boolean[] collision )
 339   {
 340     int level = getLevel( corbaLevel ) ;
 341     int lastKeyword = getLastKeyword( level ) ;
 342     boolean deprecated = false ;
 343     collision[0] = false ;
 344 
 345     // If the string is a keyword token, return that token
 346     for (int i = Token.FirstKeyword; i <= Token.LastKeyword; ++i) {
 347         if (string.equals (Token.Keywords[i])) {
 348             // <f60858.1><d62023> Return identifier if lexeme is a keyword in a
 349             // greater CORBA level; collect attributes indicating future keyword/
 350             // identifier collision and deprecations.
 351 
 352             // Init is really a funny case.  I don't want to mark it as
 353             // a keyword collision in the 2.2 case, since it was only
 354             // defined to be a keyword briefly in 2.3.
 355             if (i == Token.Init) {
 356                 if (level == CORBA_LEVEL_23)
 357                     deprecated = true ;
 358                 else
 359                     break ;
 360             }
 361 
 362             if (i > lastKeyword) {
 363                 collision[0] |= escapedOK; // escapedOK true iff not preprocessing
 364                 break ;
 365             }
 366 
 367             if (string.equals ("TRUE") || string.equals ("FALSE"))
 368                 return new Token (Token.BooleanLiteral, string) ;
 369             else
 370                 return new Token (i, deprecated);
 371         } else if (string.equalsIgnoreCase (Token.Keywords[i])) {
 372             // <d62023> PU!  This will go away in a future release, because
 373             // case-insensitive keyword checking will be standard.  For now,
 374             // indicate that a keyword collision has occurred.
 375             collision[0] |= true;
 376             break;
 377         }
 378     } // for i <= lastKeyword
 379 
 380     return null ;
 381   } // makeKeywordToken
 382 
 383   // Keywords
 384   ///////////////
 385   // Symbols
 386 
 387   static final int
 388       FirstSymbol = 100,
 389       LastSymbol  = 199;
 390 
 391   static final String [] Symbols = {
 392       ";",  "{",  "}",  ":", ",", "=", "+",  "-",
 393       "(",  ")",  "<",  ">", "[", "]", "'",  "\"",
 394       "\\", "|",  "^",  "&", "*", "/", "%",  "~",
 395       "::", "<<", ">>", ".", "#", "!", "==", "!=",
 396       ">=", "<=", "||", "&&"};
 397 
 398   // Symbols
 399   ///////////////
 400   // Literals
 401 
 402   static final int
 403       FirstLiteral = 200,
 404       LastLiteral  = 299;
 405 
 406   static final String [] Literals = {
 407       Util.getMessage ("Token.boolLit"),
 408       Util.getMessage ("Token.charLit"),
 409       Util.getMessage ("Token.intLit"),
 410       Util.getMessage ("Token.floatLit"),
 411       Util.getMessage ("Token.stringLit"),
 412       Util.getMessage ("Token.literal")};
 413 
 414   // Literals
 415   ///////////////
 416   // Directives
 417 
 418   /**
 419    * Determine whether this token is a preprocessor directive.
 420    * @return true iff this token is a preprocessor directive.
 421    **/
 422   boolean isDirective ()
 423   {
 424     return type >= FirstDirective && type <= LastDirective;
 425   } // isDirective
 426 
 427   static final int
 428       FirstDirective = 300,
 429       LastDirective  = 399;
 430 
 431   static final String [] Directives = {
 432       "define", "undef",  "if",
 433       "ifdef",  "ifndef", "else",
 434       "elif",   "include","endif",
 435       "line",   "error",  "pragma",
 436       ""};
 437 
 438   // Directives
 439   ///////////////
 440   // Specials
 441 
 442   static final int
 443       FirstSpecial = 400,
 444       LastSpecial  = 499;
 445 
 446   static final String [] Special = {
 447       "defined"};
 448 
 449   // Specials
 450   ///////////////
 451 
 452   /**
 453    * Constructor.
 454    * @return a Token of the supplied type.
 455    **/
 456   Token (int tokenType)
 457   {
 458     type = tokenType;
 459   } // ctor
 460 
 461   // <d62023>
 462   /**
 463    * Constructor.
 464    * @return a Token having the supplied attributes.
 465    **/
 466   Token (int tokenType, boolean deprecated)
 467   {
 468     this.type = tokenType;
 469     this.isDeprecated = deprecated;
 470   } // ctor
 471 
 472   /**
 473    * Constructor.
 474    * @return a Token having the supplied attributes.
 475    **/
 476   Token (int tokenType, String tokenName)
 477   {
 478     type = tokenType;
 479     name = tokenName;
 480   } // ctor
 481 
 482   /**
 483    * Constructor.
 484    * @return a Token having the supplied attribtues.
 485    *  having
 486    **/
 487   Token (int tokenType, String tokenName, boolean isWide)
 488   {
 489     this (tokenType, tokenName);
 490     this.isWide = isWide;
 491   } // ctor
 492 
 493 
 494   // <d62023>
 495   /**
 496    * Constructor.
 497    * @return a Token having the supplied attributes.
 498    **/
 499   Token (int tokenType, String tokenName, boolean escaped,
 500       boolean collision, boolean deprecated)
 501   {
 502     this (tokenType, tokenName);
 503     this.isEscaped = escaped;
 504     this.collidesWithKeyword = collision;
 505     this.isDeprecated = deprecated;
 506   } // ctor
 507 
 508   // <f46082.40> Remove keyword identifiers.
 509   ///**
 510   // * Constructor.
 511   // * @return a Token having the supplied attributes.
 512   // **/
 513   //Token (int tokenType, int tokenSubType, String tokenName)
 514   //{
 515   //  type    = tokenType;
 516   //  subType = tokenSubType;
 517   //  name    = tokenName;
 518   //} // ctor
 519 
 520   /**
 521    * Get the String representation of this Token.
 522    * @return a String containing representation of this Token.
 523    **/
 524   public String toString ()
 525   {
 526     if (type == Identifier)
 527       return name;
 528     if (type == MacroIdentifier)
 529       return name + '(';
 530     return Token.toString (type);
 531   } // toString
 532 
 533   /**
 534    * Get the String representation of a supplied Token type.
 535    * @return A String containing the name of the supplied Token type.
 536    **/
 537   static String toString (int type)
 538   {
 539     if (type <= LastKeyword)
 540       return Keywords[type];
 541     // <f46082.40> Remove keyword identifiers.
 542     //if ( (type >= FirstKeywordIdentifier) && (type <= LastKeywordIdentifier) )
 543     //  return KeywordIdentifiers[ type - FirstKeywordIdentifier ];
 544     if (type == Identifier || type == MacroIdentifier)
 545       return Util.getMessage ("Token.identifier");
 546     if (type <= LastSymbol)
 547       return Symbols[type - FirstSymbol];
 548     if (type <= LastLiteral)
 549       return Literals[type - FirstLiteral];
 550     if (type <= LastDirective)
 551       return Directives[type - FirstDirective];
 552     if (type <= LastSpecial)
 553       return Special[type - FirstSpecial];
 554     if (type == EOF)
 555       return Util.getMessage ("Token.endOfFile");
 556     return Util.getMessage ("Token.unknown");
 557   } // toString
 558 
 559   ///////////////
 560   // Accessors and Predicates
 561 
 562   /**
 563    * Determine whether this token equals a supplied token.
 564    * @return true iff the types and names of this and the supplied
 565    * Token are equal.
 566    **/
 567   boolean equals (Token that)
 568   {
 569     if (this.type == that.type)
 570       if (this.name == null)
 571         return that.name == null;
 572       else
 573         return this.name.equals (that.name);
 574     return false;
 575   } // equals
 576 
 577   /**
 578    * Determine whether the this token is of a supplied type.
 579    * @return true iff the type of this Token equals that supplied.
 580    **/
 581   boolean equals (int type)
 582   {
 583     return this.type == type;
 584   } // equals
 585 
 586   /**
 587    * Determine whether this identifier has the supplied name.
 588    * @return true iff this Token is an identifier having the supplied name.
 589    **/
 590   boolean equals (String name)
 591   {
 592     return (this.type == Identifier && this.name.equals (name));
 593   } // equals
 594 
 595   // Although isEscaped is an independent attribute, it may be true only
 596   // when type is Identifer.
 597   /**
 598    * Accessor.
 599    * @return true iff this token is an escaped identifier.
 600    **/
 601   public boolean isEscaped ()
 602   {
 603     return type == Identifier && isEscaped;
 604   } // isEscaped
 605 
 606   // <d62023>
 607   /**
 608    * Accessor.
 609    * @return true iff this token is an identifier having a name matching
 610    * a keyword in a version of CORBA greater than the specified CORBA level,
 611    * or iff it matches a keyword in letter, but note case.
 612    **/
 613   public boolean collidesWithKeyword ()
 614   {
 615     return collidesWithKeyword;
 616   } // collidesWithKeyword
 617 
 618   // <d62023> Storing deprecation information in a token seems a natural
 619   // means to notify the parser about deprecated types.
 620   /**
 621    * Accessor.
 622    * @return true iff this token is a deprecated lexeme or lexical type with
 623    * respect to the specified CORBA level.
 624    **/
 625   public boolean isDeprecated ()
 626   {
 627     return isDeprecated;
 628   }
 629   // isDeprecated
 630 
 631   public boolean isWide()
 632   {
 633       return isWide ;
 634   }
 635 
 636   // <d59166><d62023> It's more efficient if Scanner determines this attribute.
 637   /**
 638    * Determine whether this token collides with an IDL keyword.
 639    **/
 640   //public boolean collidesWithKeyword ()
 641   //{
 642   //  if (name != null && type == Identifier && !isEscaped)
 643   //  {
 644   //    String lcName = name.toLowerCase ();
 645   //    for (int i = FirstKeyword; i <= LastKeyword; ++i)
 646   //      if (lcName.equals (Token.Keywords [i].toLowerCase ()))
 647   //        return true;
 648   //  }
 649   //  return false;
 650   //} // collidesWithKeyword
 651 
 652   // Accessors and Predicates
 653   ///////////////
 654 
 655   /**
 656    * Code identifying the lexical class to which this token belongs, e.g.,
 657    * Keyword, Identifier, ...
 658    **/
 659   int type;
 660   /**
 661    * Lexeme extracted from the source for this token.
 662    **/
 663   String name = null;
 664   /**
 665    * Source comment associated with this token.
 666    **/
 667   Comment comment = null;
 668   /**
 669    * True iff this token is an escaped identifier.
 670    **/
 671   boolean isEscaped = false; // <d59165>
 672   /**
 673    * True iff this token is an identifier that is known to be a keyword
 674    * in another version of CORBA or matches a keyword in letter, but not case.
 675    **/
 676   boolean collidesWithKeyword = false;  // <d62023>
 677   /**
 678    * True iff this token is deprecated.
 679    **/
 680   boolean isDeprecated = false;  // <d62023>
 681   // <f46082.40> Remove keyword identifier implementation.
 682   ///**
 683   // * Non-zero only when type = [Macro]Identifier
 684   // **/
 685   //int subType = 0;
 686 
 687   boolean isWide = false ;  // Only for string and char literals: indicates that this is
 688                             // a wide string or char.
 689 } // class Token