1 /*
   2  * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
   3  */
   4 /*
   5  * Licensed to the Apache Software Foundation (ASF) under one or more
   6  * contributor license agreements.  See the NOTICE file distributed with
   7  * this work for additional information regarding copyright ownership.
   8  * The ASF licenses this file to You under the Apache License, Version 2.0
   9  * (the "License"); you may not use this file except in compliance with
  10  * the License.  You may obtain a copy of the License at
  11  *
  12  *      http://www.apache.org/licenses/LICENSE-2.0
  13  *
  14  * Unless required by applicable law or agreed to in writing, software
  15  * distributed under the License is distributed on an "AS IS" BASIS,
  16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17  * See the License for the specific language governing permissions and
  18  * limitations under the License.
  19  */
  20 
  21 package com.sun.org.apache.xml.internal.dtm.ref;
  22 
  23 import com.sun.org.apache.xalan.internal.utils.ObjectFactory;
  24 import com.sun.org.apache.xerces.internal.parsers.SAXParser;
  25 import com.sun.org.apache.xml.internal.res.XMLErrorResources;
  26 import com.sun.org.apache.xml.internal.res.XMLMessages;
  27 import java.io.IOException;
  28 import java.lang.reflect.Constructor;
  29 import java.lang.reflect.Method;
  30 import org.xml.sax.InputSource;
  31 import org.xml.sax.SAXException;
  32 import org.xml.sax.XMLReader;
  33 
  34 
  35 /** <p>IncrementalSAXSource_Xerces takes advantage of the fact that Xerces1
  36  * incremental mode is already a coroutine of sorts, and just wraps our
  37  * IncrementalSAXSource API around it.</p>
  38  *
  39  * <p>Usage example: See main().</p>
  40  *
  41  * <p>Status: Passes simple main() unit-test. NEEDS JAVADOC.</p>
  42  *
  43  * @LastModified: Oct 2017
  44  */
  45 public class IncrementalSAXSource_Xerces
  46   implements IncrementalSAXSource
  47 {
  48   //
  49   // Reflection. To allow this to compile with both Xerces1 and Xerces2, which
  50   // require very different methods and objects, we need to avoid static
  51   // references to those APIs. So until Xerces2 is pervasive and we're willing
  52   // to make it a prerequisite, we will rely upon relection.
  53   //
  54   Method fParseSomeSetup=null; // Xerces1 method
  55   Method fParseSome=null; // Xerces1 method
  56   Object fPullParserConfig=null; // Xerces2 pull control object
  57   Method fConfigSetInput=null; // Xerces2 method
  58   Method fConfigParse=null; // Xerces2 method
  59   Method fSetInputSource=null; // Xerces2 pull control method
  60   Constructor<?> fConfigInputSourceCtor=null; // Xerces2 initialization method
  61   Method fConfigSetByteStream=null; // Xerces2 initialization method
  62   Method fConfigSetCharStream=null; // Xerces2 initialization method
  63   Method fConfigSetEncoding=null; // Xerces2 initialization method
  64   Method fReset=null; // Both Xerces1 and Xerces2, but diff. signatures
  65 
  66   //
  67   // Data
  68   //
  69   SAXParser fIncrementalParser;
  70   private boolean fParseInProgress=false;
  71 
  72   //
  73   // Constructors
  74   //
  75 
  76   /** Create a IncrementalSAXSource_Xerces, and create a SAXParser
  77    * to go with it. Xerces2 incremental parsing is only supported if
  78    * this constructor is used, due to limitations in the Xerces2 API (as of
  79    * Beta 3). If you don't like that restriction, tell the Xerces folks that
  80    * there should be a simpler way to request incremental SAX parsing.
  81    * */
  82   public IncrementalSAXSource_Xerces()
  83                 throws NoSuchMethodException
  84         {
  85                 try
  86                 {
  87                         // This should be cleaned up and the use of reflection
  88                         // removed - see JDK-8129880
  89 
  90                         // Xerces-2 incremental parsing support (as of Beta 3)
  91                         // ContentHandlers still get set on fIncrementalParser (to get
  92                         // conversion from XNI events to SAX events), but
  93                         // _control_ for incremental parsing must be exercised via the config.
  94                         //
  95                         // At this time there's no way to read the existing config, only
  96                         // to assert a new one... and only when creating a brand-new parser.
  97                         //
  98                         // Reflection is used to allow us to continue to compile against
  99                         // Xerces1. If/when we can abandon the older versions of the parser,
 100                         // this will simplify significantly.
 101 
 102                         // If we can't get the magic constructor, no need to look further.
 103                         Class<?> xniConfigClass=ObjectFactory.findProviderClass(
 104                             "com.sun.org.apache.xerces.internal.xni.parser.XMLParserConfiguration",
 105                             true);
 106                         Class<?>[] args1={xniConfigClass};
 107                         Constructor<?> ctor=SAXParser.class.getConstructor(args1);
 108 
 109                         // Build the parser configuration object. StandardParserConfiguration
 110                         // happens to implement XMLPullParserConfiguration, which is the API
 111                         // we're going to want to use.
 112                         Class<?> xniStdConfigClass=ObjectFactory.findProviderClass(
 113                             "com.sun.org.apache.xerces.internal.parsers.StandardParserConfiguration",
 114                             true);
 115                         fPullParserConfig=xniStdConfigClass.getConstructor().newInstance();
 116                         Object[] args2={fPullParserConfig};
 117                         fIncrementalParser = (SAXParser)ctor.newInstance(args2);
 118 
 119                         // Preload all the needed the configuration methods... I want to know they're
 120                         // all here before we commit to trying to use them, just in case the
 121                         // API changes again.
 122                         Class<?> fXniInputSourceClass=ObjectFactory.findProviderClass(
 123                             "com.sun.org.apache.xerces.internal.xni.parser.XMLInputSource",
 124                             true);
 125                         Class<?>[] args3={fXniInputSourceClass};
 126                         fConfigSetInput=xniStdConfigClass.getMethod("setInputSource",args3);
 127 
 128                         Class<?>[] args4={String.class,String.class,String.class};
 129                         fConfigInputSourceCtor=fXniInputSourceClass.getConstructor(args4);
 130                         Class<?>[] args5={java.io.InputStream.class};
 131                         fConfigSetByteStream=fXniInputSourceClass.getMethod("setByteStream",args5);
 132                         Class<?>[] args6={java.io.Reader.class};
 133                         fConfigSetCharStream=fXniInputSourceClass.getMethod("setCharacterStream",args6);
 134                         Class<?>[] args7={String.class};
 135                         fConfigSetEncoding=fXniInputSourceClass.getMethod("setEncoding",args7);
 136 
 137                         Class<?>[] argsb={Boolean.TYPE};
 138                         fConfigParse=xniStdConfigClass.getMethod("parse",argsb);
 139                         Class<?>[] noargs=new Class<?>[0];
 140                         fReset=fIncrementalParser.getClass().getMethod("reset",noargs);
 141                 }
 142                 catch(Exception e)
 143                 {
 144             // Fallback if this fails (implemented in createIncrementalSAXSource) is
 145                         // to attempt Xerces-1 incremental setup. Can't do tail-call in
 146                         // constructor, so create new, copy Xerces-1 initialization,
 147                         // then throw it away... Ugh.
 148                         IncrementalSAXSource_Xerces dummy=new IncrementalSAXSource_Xerces(new SAXParser());
 149                         this.fParseSomeSetup=dummy.fParseSomeSetup;
 150                         this.fParseSome=dummy.fParseSome;
 151                         this.fIncrementalParser=dummy.fIncrementalParser;
 152                 }
 153   }
 154 
 155   /** Create a IncrementalSAXSource_Xerces wrapped around
 156    * an existing SAXParser. Currently this works only for recent
 157    * releases of Xerces-1.  Xerces-2 incremental is currently possible
 158    * only if we are allowed to create the parser instance, due to
 159    * limitations in the API exposed by Xerces-2 Beta 3; see the
 160    * no-args constructor for that code.
 161    *
 162    * @exception if the SAXParser class doesn't support the Xerces
 163    * incremental parse operations. In that case, caller should
 164    * fall back upon the IncrementalSAXSource_Filter approach.
 165    * */
 166   public IncrementalSAXSource_Xerces(SAXParser parser)
 167     throws NoSuchMethodException
 168   {
 169                 // Reflection is used to allow us to compile against
 170                 // Xerces2. If/when we can abandon the older versions of the parser,
 171                 // this constructor will simply have to fail until/unless the
 172                 // Xerces2 incremental support is made available on previously
 173                 // constructed SAXParser instances.
 174     fIncrementalParser=parser;
 175                 Class<?> me=parser.getClass();
 176     Class<?>[] parms={InputSource.class};
 177     fParseSomeSetup=me.getMethod("parseSomeSetup",parms);
 178     parms=new Class<?>[0];
 179     fParseSome=me.getMethod("parseSome",parms);
 180     // Fallback if this fails (implemented in createIncrementalSAXSource) is
 181     // to use IncrementalSAXSource_Filter rather than Xerces-specific code.
 182   }
 183 
 184   //
 185   // Factories
 186   //
 187   static public IncrementalSAXSource createIncrementalSAXSource()
 188         {
 189                 try
 190                 {
 191                         return new IncrementalSAXSource_Xerces();
 192                 }
 193                 catch(NoSuchMethodException e)
 194                 {
 195                         // Xerces version mismatch; neither Xerces1 nor Xerces2 succeeded.
 196                         // Fall back on filtering solution.
 197                         IncrementalSAXSource_Filter iss=new IncrementalSAXSource_Filter();
 198                         iss.setXMLReader(new SAXParser());
 199                         return iss;
 200                 }
 201   }
 202 
 203   static public IncrementalSAXSource
 204   createIncrementalSAXSource(SAXParser parser) {
 205                 try
 206                 {
 207                         return new IncrementalSAXSource_Xerces(parser);
 208                 }
 209                 catch(NoSuchMethodException e)
 210                 {
 211                         // Xerces version mismatch; neither Xerces1 nor Xerces2 succeeded.
 212                         // Fall back on filtering solution.
 213                         IncrementalSAXSource_Filter iss=new IncrementalSAXSource_Filter();
 214                         iss.setXMLReader(parser);
 215                         return iss;
 216                 }
 217   }
 218 
 219   //
 220   // Public methods
 221   //
 222 
 223   // Register handler directly with the incremental parser
 224   public void setContentHandler(org.xml.sax.ContentHandler handler)
 225   {
 226     // Typecast required in Xerces2; SAXParser doesn't inheret XMLReader
 227     // %OPT% Cast at asignment?
 228     ((XMLReader)fIncrementalParser).setContentHandler(handler);
 229   }
 230 
 231   // Register handler directly with the incremental parser
 232   public void setLexicalHandler(org.xml.sax.ext.LexicalHandler handler)
 233   {
 234     // Not supported by all SAX2 parsers but should work in Xerces:
 235     try
 236     {
 237       // Typecast required in Xerces2; SAXParser doesn't inheret XMLReader
 238       // %OPT% Cast at asignment?
 239       ((XMLReader)fIncrementalParser).setProperty("http://xml.org/sax/properties/lexical-handler",
 240                                      handler);
 241     }
 242     catch(org.xml.sax.SAXNotRecognizedException e)
 243     {
 244       // Nothing we can do about it
 245     }
 246     catch(org.xml.sax.SAXNotSupportedException e)
 247     {
 248       // Nothing we can do about it
 249     }
 250   }
 251 
 252   // Register handler directly with the incremental parser
 253   public void setDTDHandler(org.xml.sax.DTDHandler handler)
 254   {
 255     // Typecast required in Xerces2; SAXParser doesn't inheret XMLReader
 256     // %OPT% Cast at asignment?
 257     ((XMLReader)fIncrementalParser).setDTDHandler(handler);
 258   }
 259 
 260   //================================================================
 261   /** startParse() is a simple API which tells the IncrementalSAXSource
 262    * to begin reading a document.
 263    *
 264    * @throws SAXException is parse thread is already in progress
 265    * or parsing can not be started.
 266    * */
 267   public void startParse(InputSource source) throws SAXException
 268   {
 269     if (fIncrementalParser==null)
 270       throw new SAXException(XMLMessages.createXMLMessage(XMLErrorResources.ER_STARTPARSE_NEEDS_SAXPARSER, null)); //"startParse needs a non-null SAXParser.");
 271     if (fParseInProgress)
 272       throw new SAXException(XMLMessages.createXMLMessage(XMLErrorResources.ER_STARTPARSE_WHILE_PARSING, null)); //"startParse may not be called while parsing.");
 273 
 274     boolean ok=false;
 275 
 276     try
 277     {
 278       ok = parseSomeSetup(source);
 279     }
 280     catch(Exception ex)
 281     {
 282       throw new SAXException(ex);
 283     }
 284 
 285     if(!ok)
 286       throw new SAXException(XMLMessages.createXMLMessage(XMLErrorResources.ER_COULD_NOT_INIT_PARSER, null)); //"could not initialize parser with");
 287   }
 288 
 289 
 290   /** deliverMoreNodes() is a simple API which tells the coroutine
 291    * parser that we need more nodes.  This is intended to be called
 292    * from one of our partner routines, and serves to encapsulate the
 293    * details of how incremental parsing has been achieved.
 294    *
 295    * @param parsemore If true, tells the incremental parser to generate
 296    * another chunk of output. If false, tells the parser that we're
 297    * satisfied and it can terminate parsing of this document.
 298    * @return Boolean.TRUE if the CoroutineParser believes more data may be available
 299    * for further parsing. Boolean.FALSE if parsing ran to completion.
 300    * Exception if the parser objected for some reason.
 301    * */
 302   public Object deliverMoreNodes (boolean parsemore)
 303   {
 304     if(!parsemore)
 305     {
 306       fParseInProgress=false;
 307       return Boolean.FALSE;
 308     }
 309 
 310     Object arg;
 311     try {
 312       boolean keepgoing = parseSome();
 313       arg = keepgoing ? Boolean.TRUE : Boolean.FALSE;
 314     } catch (SAXException ex) {
 315       arg = ex;
 316     } catch (IOException ex) {
 317       arg = ex;
 318     } catch (Exception ex) {
 319       arg = new SAXException(ex);
 320     }
 321     return arg;
 322   }
 323 
 324         // Private methods -- conveniences to hide the reflection details
 325         private boolean parseSomeSetup(InputSource source)
 326                 throws SAXException, IOException, IllegalAccessException,
 327                                          java.lang.reflect.InvocationTargetException,
 328                                          java.lang.InstantiationException
 329         {
 330                 if(fConfigSetInput!=null)
 331                 {
 332                         // Obtain input from SAX inputSource object, construct XNI version of
 333                         // that object. Logic adapted from Xerces2.
 334                         Object[] parms1={source.getPublicId(),source.getSystemId(),null};
 335                         Object xmlsource=fConfigInputSourceCtor.newInstance(parms1);
 336                         Object[] parmsa={source.getByteStream()};
 337                         fConfigSetByteStream.invoke(xmlsource,parmsa);
 338                         parmsa[0]=source.getCharacterStream();
 339                         fConfigSetCharStream.invoke(xmlsource,parmsa);
 340                         parmsa[0]=source.getEncoding();
 341                         fConfigSetEncoding.invoke(xmlsource,parmsa);
 342 
 343                         // Bugzilla5272 patch suggested by Sandy Gao.
 344                         // Has to be reflection to run with Xerces2
 345                         // after compilation against Xerces1. or vice
 346                         // versa, due to return type mismatches.
 347                         Object[] noparms=new Object[0];
 348                         fReset.invoke(fIncrementalParser,noparms);
 349 
 350                         parmsa[0]=xmlsource;
 351                         fConfigSetInput.invoke(fPullParserConfig,parmsa);
 352 
 353                         // %REVIEW% Do first pull. Should we instead just return true?
 354                         return parseSome();
 355                 }
 356                 else
 357                 {
 358                         Object[] parm={source};
 359                         Object ret=fParseSomeSetup.invoke(fIncrementalParser,parm);
 360                         return ((Boolean)ret).booleanValue();
 361                 }
 362         }
 363 //  Would null work???
 364     private static final Object[] noparms=new Object[0];
 365     private static final Object[] parmsfalse={Boolean.FALSE};
 366     private boolean parseSome()
 367                 throws SAXException, IOException, IllegalAccessException,
 368                                          java.lang.reflect.InvocationTargetException
 369         {
 370                 // Take next parsing step, return false iff parsing complete:
 371                 if(fConfigSetInput!=null)
 372                 {
 373                         Object ret=(Boolean)(fConfigParse.invoke(fPullParserConfig,parmsfalse));
 374                         return ((Boolean)ret).booleanValue();
 375                 }
 376                 else
 377                 {
 378                         Object ret=fParseSome.invoke(fIncrementalParser,noparms);
 379                         return ((Boolean)ret).booleanValue();
 380                 }
 381         }
 382 
 383 
 384   //================================================================
 385   /** Simple unit test. Attempt coroutine parsing of document indicated
 386    * by first argument (as a URI), report progress.
 387    */
 388   @Deprecated
 389   public static void _main(String args[])
 390   {
 391     System.out.println("Starting...");
 392 
 393     CoroutineManager co = new CoroutineManager();
 394     int appCoroutineID = co.co_joinCoroutineSet(-1);
 395     if (appCoroutineID == -1)
 396     {
 397       System.out.println("ERROR: Couldn't allocate coroutine number.\n");
 398       return;
 399     }
 400     IncrementalSAXSource parser=
 401       createIncrementalSAXSource();
 402 
 403     // Use a serializer as our sample output
 404     com.sun.org.apache.xml.internal.serialize.XMLSerializer trace;
 405     trace=new com.sun.org.apache.xml.internal.serialize.XMLSerializer(System.out,null);
 406     parser.setContentHandler(trace);
 407     parser.setLexicalHandler(trace);
 408 
 409     // Tell coroutine to begin parsing, run while parsing is in progress
 410 
 411     for(int arg=0;arg<args.length;++arg)
 412     {
 413       try
 414       {
 415         InputSource source = new InputSource(args[arg]);
 416         Object result=null;
 417         boolean more=true;
 418         parser.startParse(source);
 419         for(result = parser.deliverMoreNodes(more);
 420             result==Boolean.TRUE;
 421             result = parser.deliverMoreNodes(more))
 422         {
 423           System.out.println("\nSome parsing successful, trying more.\n");
 424 
 425           // Special test: Terminate parsing early.
 426           if(arg+1<args.length && "!".equals(args[arg+1]))
 427           {
 428             ++arg;
 429             more=false;
 430           }
 431 
 432         }
 433 
 434         if (result instanceof Boolean && ((Boolean)result)==Boolean.FALSE)
 435         {
 436           System.out.println("\nParser ended (EOF or on request).\n");
 437         }
 438         else if (result == null) {
 439           System.out.println("\nUNEXPECTED: Parser says shut down prematurely.\n");
 440         }
 441         else if (result instanceof Exception) {
 442           throw new com.sun.org.apache.xml.internal.utils.WrappedRuntimeException((Exception)result);
 443           //          System.out.println("\nParser threw exception:");
 444           //          ((Exception)result).printStackTrace();
 445         }
 446 
 447       }
 448 
 449       catch(SAXException e)
 450       {
 451         e.printStackTrace();
 452       }
 453     }
 454 
 455   }
 456 
 457 
 458 } // class IncrementalSAXSource_Xerces