1 /* 2 * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. 3 */ 4 /* 5 * Licensed to the Apache Software Foundation (ASF) under one or more 6 * contributor license agreements. See the NOTICE file distributed with 7 * this work for additional information regarding copyright ownership. 8 * The ASF licenses this file to You under the Apache License, Version 2.0 9 * (the "License"); you may not use this file except in compliance with 10 * the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21 package com.sun.org.apache.xml.internal.dtm.ref; 22 23 import com.sun.org.apache.xalan.internal.utils.ObjectFactory; 24 import com.sun.org.apache.xerces.internal.parsers.SAXParser; 25 import com.sun.org.apache.xml.internal.res.XMLErrorResources; 26 import com.sun.org.apache.xml.internal.res.XMLMessages; 27 import java.io.IOException; 28 import java.lang.reflect.Constructor; 29 import java.lang.reflect.Method; 30 import org.xml.sax.InputSource; 31 import org.xml.sax.SAXException; 32 import org.xml.sax.XMLReader; 33 34 35 /** <p>IncrementalSAXSource_Xerces takes advantage of the fact that Xerces1 36 * incremental mode is already a coroutine of sorts, and just wraps our 37 * IncrementalSAXSource API around it.</p> 38 * 39 * <p>Usage example: See main().</p> 40 * 41 * <p>Status: Passes simple main() unit-test. NEEDS JAVADOC.</p> 42 * 43 * @LastModified: Oct 2017 44 */ 45 public class IncrementalSAXSource_Xerces 46 implements IncrementalSAXSource 47 { 48 // 49 // Reflection. To allow this to compile with both Xerces1 and Xerces2, which 50 // require very different methods and objects, we need to avoid static 51 // references to those APIs. So until Xerces2 is pervasive and we're willing 52 // to make it a prerequisite, we will rely upon relection. 53 // 54 Method fParseSomeSetup=null; // Xerces1 method 55 Method fParseSome=null; // Xerces1 method 56 Object fPullParserConfig=null; // Xerces2 pull control object 57 Method fConfigSetInput=null; // Xerces2 method 58 Method fConfigParse=null; // Xerces2 method 59 Method fSetInputSource=null; // Xerces2 pull control method 60 Constructor<?> fConfigInputSourceCtor=null; // Xerces2 initialization method 61 Method fConfigSetByteStream=null; // Xerces2 initialization method 62 Method fConfigSetCharStream=null; // Xerces2 initialization method 63 Method fConfigSetEncoding=null; // Xerces2 initialization method 64 Method fReset=null; // Both Xerces1 and Xerces2, but diff. signatures 65 66 // 67 // Data 68 // 69 SAXParser fIncrementalParser; 70 private boolean fParseInProgress=false; 71 72 // 73 // Constructors 74 // 75 76 /** Create a IncrementalSAXSource_Xerces, and create a SAXParser 77 * to go with it. Xerces2 incremental parsing is only supported if 78 * this constructor is used, due to limitations in the Xerces2 API (as of 79 * Beta 3). If you don't like that restriction, tell the Xerces folks that 80 * there should be a simpler way to request incremental SAX parsing. 81 * */ 82 public IncrementalSAXSource_Xerces() 83 throws NoSuchMethodException 84 { 85 try 86 { 87 // This should be cleaned up and the use of reflection 88 // removed - see JDK-8129880 89 90 // Xerces-2 incremental parsing support (as of Beta 3) 91 // ContentHandlers still get set on fIncrementalParser (to get 92 // conversion from XNI events to SAX events), but 93 // _control_ for incremental parsing must be exercised via the config. 94 // 95 // At this time there's no way to read the existing config, only 96 // to assert a new one... and only when creating a brand-new parser. 97 // 98 // Reflection is used to allow us to continue to compile against 99 // Xerces1. If/when we can abandon the older versions of the parser, 100 // this will simplify significantly. 101 102 // If we can't get the magic constructor, no need to look further. 103 Class<?> xniConfigClass=ObjectFactory.findProviderClass( 104 "com.sun.org.apache.xerces.internal.xni.parser.XMLParserConfiguration", 105 true); 106 Class<?>[] args1={xniConfigClass}; 107 Constructor<?> ctor=SAXParser.class.getConstructor(args1); 108 109 // Build the parser configuration object. StandardParserConfiguration 110 // happens to implement XMLPullParserConfiguration, which is the API 111 // we're going to want to use. 112 Class<?> xniStdConfigClass=ObjectFactory.findProviderClass( 113 "com.sun.org.apache.xerces.internal.parsers.StandardParserConfiguration", 114 true); 115 fPullParserConfig=xniStdConfigClass.getConstructor().newInstance(); 116 Object[] args2={fPullParserConfig}; 117 fIncrementalParser = (SAXParser)ctor.newInstance(args2); 118 119 // Preload all the needed the configuration methods... I want to know they're 120 // all here before we commit to trying to use them, just in case the 121 // API changes again. 122 Class<?> fXniInputSourceClass=ObjectFactory.findProviderClass( 123 "com.sun.org.apache.xerces.internal.xni.parser.XMLInputSource", 124 true); 125 Class<?>[] args3={fXniInputSourceClass}; 126 fConfigSetInput=xniStdConfigClass.getMethod("setInputSource",args3); 127 128 Class<?>[] args4={String.class,String.class,String.class}; 129 fConfigInputSourceCtor=fXniInputSourceClass.getConstructor(args4); 130 Class<?>[] args5={java.io.InputStream.class}; 131 fConfigSetByteStream=fXniInputSourceClass.getMethod("setByteStream",args5); 132 Class<?>[] args6={java.io.Reader.class}; 133 fConfigSetCharStream=fXniInputSourceClass.getMethod("setCharacterStream",args6); 134 Class<?>[] args7={String.class}; 135 fConfigSetEncoding=fXniInputSourceClass.getMethod("setEncoding",args7); 136 137 Class<?>[] argsb={Boolean.TYPE}; 138 fConfigParse=xniStdConfigClass.getMethod("parse",argsb); 139 Class<?>[] noargs=new Class<?>[0]; 140 fReset=fIncrementalParser.getClass().getMethod("reset",noargs); 141 } 142 catch(Exception e) 143 { 144 // Fallback if this fails (implemented in createIncrementalSAXSource) is 145 // to attempt Xerces-1 incremental setup. Can't do tail-call in 146 // constructor, so create new, copy Xerces-1 initialization, 147 // then throw it away... Ugh. 148 IncrementalSAXSource_Xerces dummy=new IncrementalSAXSource_Xerces(new SAXParser()); 149 this.fParseSomeSetup=dummy.fParseSomeSetup; 150 this.fParseSome=dummy.fParseSome; 151 this.fIncrementalParser=dummy.fIncrementalParser; 152 } 153 } 154 155 /** Create a IncrementalSAXSource_Xerces wrapped around 156 * an existing SAXParser. Currently this works only for recent 157 * releases of Xerces-1. Xerces-2 incremental is currently possible 158 * only if we are allowed to create the parser instance, due to 159 * limitations in the API exposed by Xerces-2 Beta 3; see the 160 * no-args constructor for that code. 161 * 162 * @exception if the SAXParser class doesn't support the Xerces 163 * incremental parse operations. In that case, caller should 164 * fall back upon the IncrementalSAXSource_Filter approach. 165 * */ 166 public IncrementalSAXSource_Xerces(SAXParser parser) 167 throws NoSuchMethodException 168 { 169 // Reflection is used to allow us to compile against 170 // Xerces2. If/when we can abandon the older versions of the parser, 171 // this constructor will simply have to fail until/unless the 172 // Xerces2 incremental support is made available on previously 173 // constructed SAXParser instances. 174 fIncrementalParser=parser; 175 Class<?> me=parser.getClass(); 176 Class<?>[] parms={InputSource.class}; 177 fParseSomeSetup=me.getMethod("parseSomeSetup",parms); 178 parms=new Class<?>[0]; 179 fParseSome=me.getMethod("parseSome",parms); 180 // Fallback if this fails (implemented in createIncrementalSAXSource) is 181 // to use IncrementalSAXSource_Filter rather than Xerces-specific code. 182 } 183 184 // 185 // Factories 186 // 187 static public IncrementalSAXSource createIncrementalSAXSource() 188 { 189 try 190 { 191 return new IncrementalSAXSource_Xerces(); 192 } 193 catch(NoSuchMethodException e) 194 { 195 // Xerces version mismatch; neither Xerces1 nor Xerces2 succeeded. 196 // Fall back on filtering solution. 197 IncrementalSAXSource_Filter iss=new IncrementalSAXSource_Filter(); 198 iss.setXMLReader(new SAXParser()); 199 return iss; 200 } 201 } 202 203 static public IncrementalSAXSource 204 createIncrementalSAXSource(SAXParser parser) { 205 try 206 { 207 return new IncrementalSAXSource_Xerces(parser); 208 } 209 catch(NoSuchMethodException e) 210 { 211 // Xerces version mismatch; neither Xerces1 nor Xerces2 succeeded. 212 // Fall back on filtering solution. 213 IncrementalSAXSource_Filter iss=new IncrementalSAXSource_Filter(); 214 iss.setXMLReader(parser); 215 return iss; 216 } 217 } 218 219 // 220 // Public methods 221 // 222 223 // Register handler directly with the incremental parser 224 public void setContentHandler(org.xml.sax.ContentHandler handler) 225 { 226 // Typecast required in Xerces2; SAXParser doesn't inheret XMLReader 227 // %OPT% Cast at asignment? 228 ((XMLReader)fIncrementalParser).setContentHandler(handler); 229 } 230 231 // Register handler directly with the incremental parser 232 public void setLexicalHandler(org.xml.sax.ext.LexicalHandler handler) 233 { 234 // Not supported by all SAX2 parsers but should work in Xerces: 235 try 236 { 237 // Typecast required in Xerces2; SAXParser doesn't inheret XMLReader 238 // %OPT% Cast at asignment? 239 ((XMLReader)fIncrementalParser).setProperty("http://xml.org/sax/properties/lexical-handler", 240 handler); 241 } 242 catch(org.xml.sax.SAXNotRecognizedException e) 243 { 244 // Nothing we can do about it 245 } 246 catch(org.xml.sax.SAXNotSupportedException e) 247 { 248 // Nothing we can do about it 249 } 250 } 251 252 // Register handler directly with the incremental parser 253 public void setDTDHandler(org.xml.sax.DTDHandler handler) 254 { 255 // Typecast required in Xerces2; SAXParser doesn't inheret XMLReader 256 // %OPT% Cast at asignment? 257 ((XMLReader)fIncrementalParser).setDTDHandler(handler); 258 } 259 260 //================================================================ 261 /** startParse() is a simple API which tells the IncrementalSAXSource 262 * to begin reading a document. 263 * 264 * @throws SAXException is parse thread is already in progress 265 * or parsing can not be started. 266 * */ 267 public void startParse(InputSource source) throws SAXException 268 { 269 if (fIncrementalParser==null) 270 throw new SAXException(XMLMessages.createXMLMessage(XMLErrorResources.ER_STARTPARSE_NEEDS_SAXPARSER, null)); //"startParse needs a non-null SAXParser."); 271 if (fParseInProgress) 272 throw new SAXException(XMLMessages.createXMLMessage(XMLErrorResources.ER_STARTPARSE_WHILE_PARSING, null)); //"startParse may not be called while parsing."); 273 274 boolean ok=false; 275 276 try 277 { 278 ok = parseSomeSetup(source); 279 } 280 catch(Exception ex) 281 { 282 throw new SAXException(ex); 283 } 284 285 if(!ok) 286 throw new SAXException(XMLMessages.createXMLMessage(XMLErrorResources.ER_COULD_NOT_INIT_PARSER, null)); //"could not initialize parser with"); 287 } 288 289 290 /** deliverMoreNodes() is a simple API which tells the coroutine 291 * parser that we need more nodes. This is intended to be called 292 * from one of our partner routines, and serves to encapsulate the 293 * details of how incremental parsing has been achieved. 294 * 295 * @param parsemore If true, tells the incremental parser to generate 296 * another chunk of output. If false, tells the parser that we're 297 * satisfied and it can terminate parsing of this document. 298 * @return Boolean.TRUE if the CoroutineParser believes more data may be available 299 * for further parsing. Boolean.FALSE if parsing ran to completion. 300 * Exception if the parser objected for some reason. 301 * */ 302 public Object deliverMoreNodes (boolean parsemore) 303 { 304 if(!parsemore) 305 { 306 fParseInProgress=false; 307 return Boolean.FALSE; 308 } 309 310 Object arg; 311 try { 312 boolean keepgoing = parseSome(); 313 arg = keepgoing ? Boolean.TRUE : Boolean.FALSE; 314 } catch (SAXException ex) { 315 arg = ex; 316 } catch (IOException ex) { 317 arg = ex; 318 } catch (Exception ex) { 319 arg = new SAXException(ex); 320 } 321 return arg; 322 } 323 324 // Private methods -- conveniences to hide the reflection details 325 private boolean parseSomeSetup(InputSource source) 326 throws SAXException, IOException, IllegalAccessException, 327 java.lang.reflect.InvocationTargetException, 328 java.lang.InstantiationException 329 { 330 if(fConfigSetInput!=null) 331 { 332 // Obtain input from SAX inputSource object, construct XNI version of 333 // that object. Logic adapted from Xerces2. 334 Object[] parms1={source.getPublicId(),source.getSystemId(),null}; 335 Object xmlsource=fConfigInputSourceCtor.newInstance(parms1); 336 Object[] parmsa={source.getByteStream()}; 337 fConfigSetByteStream.invoke(xmlsource,parmsa); 338 parmsa[0]=source.getCharacterStream(); 339 fConfigSetCharStream.invoke(xmlsource,parmsa); 340 parmsa[0]=source.getEncoding(); 341 fConfigSetEncoding.invoke(xmlsource,parmsa); 342 343 // Bugzilla5272 patch suggested by Sandy Gao. 344 // Has to be reflection to run with Xerces2 345 // after compilation against Xerces1. or vice 346 // versa, due to return type mismatches. 347 Object[] noparms=new Object[0]; 348 fReset.invoke(fIncrementalParser,noparms); 349 350 parmsa[0]=xmlsource; 351 fConfigSetInput.invoke(fPullParserConfig,parmsa); 352 353 // %REVIEW% Do first pull. Should we instead just return true? 354 return parseSome(); 355 } 356 else 357 { 358 Object[] parm={source}; 359 Object ret=fParseSomeSetup.invoke(fIncrementalParser,parm); 360 return ((Boolean)ret).booleanValue(); 361 } 362 } 363 // Would null work??? 364 private static final Object[] noparms=new Object[0]; 365 private static final Object[] parmsfalse={Boolean.FALSE}; 366 private boolean parseSome() 367 throws SAXException, IOException, IllegalAccessException, 368 java.lang.reflect.InvocationTargetException 369 { 370 // Take next parsing step, return false iff parsing complete: 371 if(fConfigSetInput!=null) 372 { 373 Object ret=(Boolean)(fConfigParse.invoke(fPullParserConfig,parmsfalse)); 374 return ((Boolean)ret).booleanValue(); 375 } 376 else 377 { 378 Object ret=fParseSome.invoke(fIncrementalParser,noparms); 379 return ((Boolean)ret).booleanValue(); 380 } 381 } 382 383 384 //================================================================ 385 /** Simple unit test. Attempt coroutine parsing of document indicated 386 * by first argument (as a URI), report progress. 387 */ 388 @Deprecated 389 public static void _main(String args[]) 390 { 391 System.out.println("Starting..."); 392 393 CoroutineManager co = new CoroutineManager(); 394 int appCoroutineID = co.co_joinCoroutineSet(-1); 395 if (appCoroutineID == -1) 396 { 397 System.out.println("ERROR: Couldn't allocate coroutine number.\n"); 398 return; 399 } 400 IncrementalSAXSource parser= 401 createIncrementalSAXSource(); 402 403 // Use a serializer as our sample output 404 com.sun.org.apache.xml.internal.serialize.XMLSerializer trace; 405 trace=new com.sun.org.apache.xml.internal.serialize.XMLSerializer(System.out,null); 406 parser.setContentHandler(trace); 407 parser.setLexicalHandler(trace); 408 409 // Tell coroutine to begin parsing, run while parsing is in progress 410 411 for(int arg=0;arg<args.length;++arg) 412 { 413 try 414 { 415 InputSource source = new InputSource(args[arg]); 416 Object result=null; 417 boolean more=true; 418 parser.startParse(source); 419 for(result = parser.deliverMoreNodes(more); 420 result==Boolean.TRUE; 421 result = parser.deliverMoreNodes(more)) 422 { 423 System.out.println("\nSome parsing successful, trying more.\n"); 424 425 // Special test: Terminate parsing early. 426 if(arg+1<args.length && "!".equals(args[arg+1])) 427 { 428 ++arg; 429 more=false; 430 } 431 432 } 433 434 if (result instanceof Boolean && ((Boolean)result)==Boolean.FALSE) 435 { 436 System.out.println("\nParser ended (EOF or on request).\n"); 437 } 438 else if (result == null) { 439 System.out.println("\nUNEXPECTED: Parser says shut down prematurely.\n"); 440 } 441 else if (result instanceof Exception) { 442 throw new com.sun.org.apache.xml.internal.utils.WrappedRuntimeException((Exception)result); 443 // System.out.println("\nParser threw exception:"); 444 // ((Exception)result).printStackTrace(); 445 } 446 447 } 448 449 catch(SAXException e) 450 { 451 e.printStackTrace(); 452 } 453 } 454 455 } 456 457 458 } // class IncrementalSAXSource_Xerces