1 /*
   2  * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
   3  */
   4 /*
   5  * Licensed to the Apache Software Foundation (ASF) under one or more
   6  * contributor license agreements.  See the NOTICE file distributed with
   7  * this work for additional information regarding copyright ownership.
   8  * The ASF licenses this file to You under the Apache License, Version 2.0
   9  * (the "License"); you may not use this file except in compliance with
  10  * the License.  You may obtain a copy of the License at
  11  *
  12  *     http://www.apache.org/licenses/LICENSE-2.0
  13  *
  14  * Unless required by applicable law or agreed to in writing, software
  15  * distributed under the License is distributed on an "AS IS" BASIS,
  16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  17  * See the License for the specific language governing permissions and
  18  * limitations under the License.
  19  */
  20 /*
  21  * $Id: DocumentCache.java,v 1.2.4.1 2005/09/06 06:15:22 pvedula Exp $
  22  */
  23 
  24 package com.sun.org.apache.xalan.internal.xsltc.dom;
  25 
  26 import com.sun.org.apache.xalan.internal.xsltc.DOM;
  27 import com.sun.org.apache.xalan.internal.xsltc.DOMCache;
  28 import com.sun.org.apache.xalan.internal.xsltc.DOMEnhancedForDTM;
  29 import com.sun.org.apache.xalan.internal.xsltc.Translet;
  30 import com.sun.org.apache.xalan.internal.xsltc.runtime.AbstractTranslet;
  31 import com.sun.org.apache.xalan.internal.xsltc.runtime.BasisLibrary;
  32 import com.sun.org.apache.xalan.internal.xsltc.runtime.Constants;
  33 import com.sun.org.apache.xml.internal.utils.SystemIDResolver;
  34 import java.io.File;
  35 import java.io.PrintWriter;
  36 import java.net.URL;
  37 import java.net.URLConnection;
  38 import java.nio.file.Paths;
  39 import java.util.Date;
  40 import java.util.HashMap;
  41 import java.util.Map;
  42 import javax.xml.parsers.ParserConfigurationException;
  43 import javax.xml.parsers.SAXParser;
  44 import javax.xml.parsers.SAXParserFactory;
  45 import javax.xml.transform.TransformerException;
  46 import javax.xml.transform.sax.SAXSource;
  47 import org.xml.sax.InputSource;
  48 import org.xml.sax.SAXException;
  49 import org.xml.sax.XMLReader;
  50 
  51 /**
  52  * @author Morten Jorgensen
  53  */
  54 public final class DocumentCache implements DOMCache {
  55 
  56     private int       _size;
  57     private Map<String, CachedDocument> _references;
  58     private String[]  _URIs;
  59     private int       _count;
  60     private int       _current;
  61     private SAXParser _parser;
  62     private XMLReader _reader;
  63     private XSLTCDTMManager _dtmManager;
  64 
  65     private static final int REFRESH_INTERVAL = 1000;
  66 
  67     /*
  68      * Inner class containing a DOMImpl object and DTD handler
  69      */
  70     public final class CachedDocument {
  71 
  72         // Statistics data
  73         private long _firstReferenced;
  74         private long _lastReferenced;
  75         private long _accessCount;
  76         private long _lastModified;
  77         private long _lastChecked;
  78         private long _buildTime;
  79 
  80         // DOM and DTD handler references
  81         private DOMEnhancedForDTM _dom = null;
  82 
  83         /**
  84          * Constructor - load document and initialise statistics
  85          */
  86         public CachedDocument(String uri) {
  87             // Initialise statistics variables
  88             final long stamp = System.currentTimeMillis();
  89             _firstReferenced = stamp;
  90             _lastReferenced  = stamp;
  91             _accessCount     = 0;
  92             loadDocument(uri);
  93 
  94             _buildTime = System.currentTimeMillis() - stamp;
  95         }
  96 
  97         /**
  98          * Loads the document and updates build-time (latency) statistics
  99          */
 100         public void loadDocument(String uri) {
 101 
 102             try {
 103                 final long stamp = System.currentTimeMillis();
 104                 _dom = (DOMEnhancedForDTM)_dtmManager.getDTM(
 105                                  new SAXSource(_reader, new InputSource(uri)),
 106                                  false, null, true, false);
 107                 _dom.setDocumentURI(uri);
 108 
 109                 // The build time can be used for statistics for a better
 110                 // priority algorithm (currently round robin).
 111                 final long thisTime = System.currentTimeMillis() - stamp;
 112                 if (_buildTime > 0)
 113                     _buildTime = (_buildTime + thisTime) >>> 1;
 114                 else
 115                     _buildTime = thisTime;
 116             }
 117             catch (Exception e) {
 118                 _dom = null;
 119             }
 120         }
 121 
 122         public DOM getDocument()       { return(_dom); }
 123 
 124         public long getFirstReferenced()   { return(_firstReferenced); }
 125 
 126         public long getLastReferenced()    { return(_lastReferenced); }
 127 
 128         public long getAccessCount()       { return(_accessCount); }
 129 
 130         public void incAccessCount()       { _accessCount++; }
 131 
 132         public long getLastModified()      { return(_lastModified); }
 133 
 134         public void setLastModified(long t){ _lastModified = t; }
 135 
 136         public long getLatency()           { return(_buildTime); }
 137 
 138         public long getLastChecked()       { return(_lastChecked); }
 139 
 140         public void setLastChecked(long t) { _lastChecked = t; }
 141 
 142         public long getEstimatedSize() {
 143             if (_dom != null)
 144                 return(_dom.getSize() << 5); // ???
 145             else
 146                 return(0);
 147         }
 148 
 149     }
 150 
 151     /**
 152      * DocumentCache constructor
 153      */
 154     public DocumentCache(int size) throws SAXException {
 155         this(size, null);
 156         try {
 157             _dtmManager = XSLTCDTMManager.createNewDTMManagerInstance();
 158         } catch (Exception e) {
 159             throw new SAXException(e);
 160         }
 161     }
 162 
 163     /**
 164      * DocumentCache constructor
 165      */
 166     public DocumentCache(int size, XSLTCDTMManager dtmManager) throws SAXException {
 167         _dtmManager = dtmManager;
 168         _count = 0;
 169         _current = 0;
 170         _size  = size;
 171         _references = new HashMap<>(_size+2);
 172         _URIs = new String[_size];
 173 
 174         try {
 175             // Create a SAX parser and get the XMLReader object it uses
 176             final SAXParserFactory factory = SAXParserFactory.newInstance();
 177             try {
 178                 factory.setFeature(Constants.NAMESPACE_FEATURE,true);
 179             }
 180             catch (Exception e) {
 181                 factory.setNamespaceAware(true);
 182             }
 183             _parser = factory.newSAXParser();
 184             _reader = _parser.getXMLReader();
 185         }
 186         catch (ParserConfigurationException e) {
 187             BasisLibrary.runTimeError(BasisLibrary.NAMESPACES_SUPPORT_ERR);
 188         }
 189     }
 190 
 191     /**
 192      * Returns the time-stamp for a document's last update
 193      */
 194     private final long getLastModified(String uri) {
 195         try {
 196             URL url = new URL(uri);
 197             URLConnection connection = url.openConnection();
 198             long timestamp = connection.getLastModified();
 199             // Check for a "file:" URI (courtesy of Brian Ewins)
 200             if (timestamp == 0){ // get 0 for local URI
 201                 if ("file".equals(url.getProtocol())){
 202                     File localfile = Paths.get(url.toURI()).toFile();
 203                     timestamp = localfile.lastModified();
 204                 }
 205             }
 206             return(timestamp);
 207         }
 208         // Brutal handling of all exceptions
 209         catch (Exception e) {
 210             return(System.currentTimeMillis());
 211         }
 212     }
 213 
 214     /**
 215      *
 216      */
 217     private CachedDocument lookupDocument(String uri) {
 218         return(_references.get(uri));
 219     }
 220 
 221     /**
 222      *
 223      */
 224     private synchronized void insertDocument(String uri, CachedDocument doc) {
 225         if (_count < _size) {
 226             // Insert out URI in circular buffer
 227             _URIs[_count++] = uri;
 228             _current = 0;
 229         }
 230         else {
 231             // Remove oldest URI from reference map
 232             _references.remove(_URIs[_current]);
 233             // Insert our URI in circular buffer
 234             _URIs[_current] = uri;
 235             if (++_current >= _size) _current = 0;
 236         }
 237         _references.put(uri, doc);
 238     }
 239 
 240     /**
 241      *
 242      */
 243     private synchronized void replaceDocument(String uri, CachedDocument doc) {
 244         if (doc == null)
 245             insertDocument(uri, doc);
 246         else
 247             _references.put(uri, doc);
 248     }
 249 
 250     /**
 251      * Returns a document either by finding it in the cache or
 252      * downloading it and putting it in the cache.
 253      */
 254     @Override
 255     public DOM retrieveDocument(String baseURI, String href, Translet trs) {
 256         CachedDocument doc;
 257 
 258     String uri = href;
 259     if (baseURI != null && !baseURI.equals("")) {
 260         try {
 261             uri = SystemIDResolver.getAbsoluteURI(uri, baseURI);
 262         } catch (TransformerException te) {
 263             // ignore
 264         }
 265     }
 266 
 267         // Try to get the document from the cache first
 268         if ((doc = lookupDocument(uri)) == null) {
 269             doc = new CachedDocument(uri);
 270             if (doc == null) return null; // better error handling needed!!!
 271             doc.setLastModified(getLastModified(uri));
 272             insertDocument(uri, doc);
 273         }
 274         // If the document is in the cache we must check if it is still valid
 275         else {
 276             long now = System.currentTimeMillis();
 277             long chk = doc.getLastChecked();
 278             doc.setLastChecked(now);
 279             // Has the modification time for this file been checked lately?
 280             if (now > (chk + REFRESH_INTERVAL)) {
 281                 doc.setLastChecked(now);
 282                 long last = getLastModified(uri);
 283                 // Reload document if it has been modified since last download
 284                 if (last > doc.getLastModified()) {
 285                     doc = new CachedDocument(uri);
 286                     if (doc == null) return null;
 287                     doc.setLastModified(getLastModified(uri));
 288                     replaceDocument(uri, doc);
 289                 }
 290             }
 291 
 292         }
 293 
 294         // Get the references to the actual DOM and DTD handler
 295         final DOM dom = doc.getDocument();
 296 
 297         // The dom reference may be null if the URL pointed to a
 298         // non-existing document
 299         if (dom == null) return null;
 300 
 301         doc.incAccessCount(); // For statistics
 302 
 303         final AbstractTranslet translet = (AbstractTranslet)trs;
 304 
 305         // Give the translet an early opportunity to extract any
 306         // information from the DOM object that it would like.
 307         translet.prepassDocument(dom);
 308 
 309         return(doc.getDocument());
 310     }
 311 
 312     /**
 313      * Outputs the cache statistics
 314      */
 315     public void getStatistics(PrintWriter out) {
 316         out.println("<h2>DOM cache statistics</h2><center><table border=\"2\">"+
 317                     "<tr><td><b>Document URI</b></td>"+
 318                     "<td><center><b>Build time</b></center></td>"+
 319                     "<td><center><b>Access count</b></center></td>"+
 320                     "<td><center><b>Last accessed</b></center></td>"+
 321                     "<td><center><b>Last modified</b></center></td></tr>");
 322 
 323         for (int i=0; i<_count; i++) {
 324             CachedDocument doc = _references.get(_URIs[i]);
 325             out.print("<tr><td><a href=\""+_URIs[i]+"\">"+
 326                       "<font size=-1>"+_URIs[i]+"</font></a></td>");
 327             out.print("<td><center>"+doc.getLatency()+"ms</center></td>");
 328             out.print("<td><center>"+doc.getAccessCount()+"</center></td>");
 329             out.print("<td><center>"+(new Date(doc.getLastReferenced()))+
 330                       "</center></td>");
 331             out.print("<td><center>"+(new Date(doc.getLastModified()))+
 332                       "</center></td>");
 333             out.println("</tr>");
 334         }
 335 
 336         out.println("</table></center>");
 337     }
 338 }