1. /*
  2. * Copyright 2001-2004 The Apache Software Foundation.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /*
  17. * $Id: DocumentCache.java,v 1.15 2004/02/16 22:54:59 minchau Exp $
  18. */
  19. package com.sun.org.apache.xalan.internal.xsltc.dom;
  20. import java.io.File;
  21. import java.io.PrintWriter;
  22. import java.net.URL;
  23. import java.net.URLConnection;
  24. import java.net.URLDecoder;
  25. import java.util.Date;
  26. import java.util.Hashtable;
  27. import javax.xml.parsers.ParserConfigurationException;
  28. import javax.xml.parsers.SAXParser;
  29. import javax.xml.parsers.SAXParserFactory;
  30. import javax.xml.transform.TransformerException;
  31. import javax.xml.transform.sax.SAXSource;
  32. import com.sun.org.apache.xalan.internal.xsltc.DOM;
  33. import com.sun.org.apache.xalan.internal.xsltc.DOMCache;
  34. import com.sun.org.apache.xalan.internal.xsltc.DOMEnhancedForDTM;
  35. import com.sun.org.apache.xalan.internal.xsltc.Translet;
  36. import com.sun.org.apache.xalan.internal.xsltc.runtime.AbstractTranslet;
  37. import com.sun.org.apache.xalan.internal.xsltc.runtime.BasisLibrary;
  38. import com.sun.org.apache.xalan.internal.xsltc.runtime.Constants;
  39. import com.sun.org.apache.xml.internal.utils.SystemIDResolver;
  40. import org.xml.sax.InputSource;
  41. import org.xml.sax.SAXException;
  42. import org.xml.sax.XMLReader;
  43. /**
  44. * @author Morten Jorgensen
  45. */
  46. public final class DocumentCache implements DOMCache {
  47. private int _size;
  48. private Hashtable _references;
  49. private String[] _URIs;
  50. private int _count;
  51. private int _current;
  52. private SAXParser _parser;
  53. private XMLReader _reader;
  54. private XSLTCDTMManager _dtmManager;
  55. private static final int REFRESH_INTERVAL = 1000;
  56. /*
  57. * Inner class containing a DOMImpl object and DTD handler
  58. */
  59. public final class CachedDocument {
  60. // Statistics data
  61. private long _firstReferenced;
  62. private long _lastReferenced;
  63. private long _accessCount;
  64. private long _lastModified;
  65. private long _lastChecked;
  66. private long _buildTime;
  67. // DOM and DTD handler references
  68. private DOMEnhancedForDTM _dom = null;
  69. /**
  70. * Constructor - load document and initialise statistics
  71. */
  72. public CachedDocument(String uri) {
  73. // Initialise statistics variables
  74. final long stamp = System.currentTimeMillis();
  75. _firstReferenced = stamp;
  76. _lastReferenced = stamp;
  77. _accessCount = 0;
  78. loadDocument(uri);
  79. _buildTime = System.currentTimeMillis() - stamp;
  80. }
  81. /**
  82. * Loads the document and updates build-time (latency) statistics
  83. */
  84. public void loadDocument(String uri) {
  85. try {
  86. final long stamp = System.currentTimeMillis();
  87. _dom = (DOMEnhancedForDTM)_dtmManager.getDTM(
  88. new SAXSource(_reader, new InputSource(uri)),
  89. false, null, true, false);
  90. _dom.setDocumentURI(uri);
  91. // The build time can be used for statistics for a better
  92. // priority algorithm (currently round robin).
  93. final long thisTime = System.currentTimeMillis() - stamp;
  94. if (_buildTime > 0)
  95. _buildTime = (_buildTime + thisTime) >>> 1;
  96. else
  97. _buildTime = thisTime;
  98. }
  99. catch (Exception e) {
  100. _dom = null;
  101. }
  102. }
  103. public DOM getDocument() { return(_dom); }
  104. public long getFirstReferenced() { return(_firstReferenced); }
  105. public long getLastReferenced() { return(_lastReferenced); }
  106. public long getAccessCount() { return(_accessCount); }
  107. public void incAccessCount() { _accessCount++; }
  108. public long getLastModified() { return(_lastModified); }
  109. public void setLastModified(long t){ _lastModified = t; }
  110. public long getLatency() { return(_buildTime); }
  111. public long getLastChecked() { return(_lastChecked); }
  112. public void setLastChecked(long t) { _lastChecked = t; }
  113. public long getEstimatedSize() {
  114. if (_dom != null)
  115. return(_dom.getSize() << 5); // ???
  116. else
  117. return(0);
  118. }
  119. }
  120. /**
  121. * DocumentCache constructor
  122. */
  123. public DocumentCache(int size) throws SAXException {
  124. this(size, null);
  125. try {
  126. _dtmManager = (XSLTCDTMManager)XSLTCDTMManager.getDTMManagerClass()
  127. .newInstance();
  128. } catch (Exception e) {
  129. throw new SAXException(e);
  130. }
  131. }
  132. /**
  133. * DocumentCache constructor
  134. */
  135. public DocumentCache(int size, XSLTCDTMManager dtmManager) throws SAXException {
  136. _dtmManager = dtmManager;
  137. _count = 0;
  138. _current = 0;
  139. _size = size;
  140. _references = new Hashtable(_size+2);
  141. _URIs = new String[_size];
  142. try {
  143. // Create a SAX parser and get the XMLReader object it uses
  144. final SAXParserFactory factory = SAXParserFactory.newInstance();
  145. try {
  146. factory.setFeature(Constants.NAMESPACE_FEATURE,true);
  147. }
  148. catch (Exception e) {
  149. factory.setNamespaceAware(true);
  150. }
  151. _parser = factory.newSAXParser();
  152. _reader = _parser.getXMLReader();
  153. }
  154. catch (ParserConfigurationException e) {
  155. BasisLibrary.runTimeError(BasisLibrary.NAMESPACES_SUPPORT_ERR);
  156. System.exit(-1);
  157. }
  158. }
  159. /**
  160. * Returns the time-stamp for a document's last update
  161. */
  162. private final long getLastModified(String uri) {
  163. try {
  164. URL url = new URL(uri);
  165. URLConnection connection = url.openConnection();
  166. long timestamp = connection.getLastModified();
  167. // Check for a "file:" URI (courtesy of Brian Ewins)
  168. if (timestamp == 0){ // get 0 for local URI
  169. if ("file".equals(url.getProtocol())){
  170. File localfile = new File(URLDecoder.decode(url.getFile()));
  171. timestamp = localfile.lastModified();
  172. }
  173. }
  174. return(timestamp);
  175. }
  176. // Brutal handling of all exceptions
  177. catch (Exception e) {
  178. return(System.currentTimeMillis());
  179. }
  180. }
  181. /**
  182. *
  183. */
  184. private CachedDocument lookupDocument(String uri) {
  185. return((CachedDocument)_references.get(uri));
  186. }
  187. /**
  188. *
  189. */
  190. private synchronized void insertDocument(String uri, CachedDocument doc) {
  191. if (_count < _size) {
  192. // Insert out URI in circular buffer
  193. _URIs[_count++] = uri;
  194. _current = 0;
  195. }
  196. else {
  197. // Remove oldest URI from reference Hashtable
  198. _references.remove(_URIs[_current]);
  199. // Insert our URI in circular buffer
  200. _URIs[_current] = uri;
  201. if (++_current >= _size) _current = 0;
  202. }
  203. _references.put(uri, doc);
  204. }
  205. /**
  206. *
  207. */
  208. private synchronized void replaceDocument(String uri, CachedDocument doc) {
  209. CachedDocument old = (CachedDocument)_references.get(uri);
  210. if (doc == null)
  211. insertDocument(uri, doc);
  212. else
  213. _references.put(uri, doc);
  214. }
  215. /**
  216. * Returns a document either by finding it in the cache or
  217. * downloading it and putting it in the cache.
  218. */
  219. public DOM retrieveDocument(String baseURI, String href, Translet trs) {
  220. CachedDocument doc;
  221. String uri = href;
  222. if (baseURI != null && !baseURI.equals("")) {
  223. try {
  224. uri = SystemIDResolver.getAbsoluteURI(uri, baseURI);
  225. } catch (TransformerException te) {
  226. // ignore
  227. }
  228. }
  229. // Try to get the document from the cache first
  230. if ((doc = lookupDocument(uri)) == null) {
  231. doc = new CachedDocument(uri);
  232. if (doc == null) return null; // better error handling needed!!!
  233. doc.setLastModified(getLastModified(uri));
  234. insertDocument(uri, doc);
  235. }
  236. // If the document is in the cache we must check if it is still valid
  237. else {
  238. long now = System.currentTimeMillis();
  239. long chk = doc.getLastChecked();
  240. doc.setLastChecked(now);
  241. // Has the modification time for this file been checked lately?
  242. if (now > (chk + REFRESH_INTERVAL)) {
  243. doc.setLastChecked(now);
  244. long last = getLastModified(uri);
  245. // Reload document if it has been modified since last download
  246. if (last > doc.getLastModified()) {
  247. doc = new CachedDocument(uri);
  248. if (doc == null) return null;
  249. doc.setLastModified(getLastModified(uri));
  250. replaceDocument(uri, doc);
  251. }
  252. }
  253. }
  254. // Get the references to the actual DOM and DTD handler
  255. final DOM dom = doc.getDocument();
  256. // The dom reference may be null if the URL pointed to a
  257. // non-existing document
  258. if (dom == null) return null;
  259. doc.incAccessCount(); // For statistics
  260. final AbstractTranslet translet = (AbstractTranslet)trs;
  261. // Give the translet an early opportunity to extract any
  262. // information from the DOM object that it would like.
  263. translet.prepassDocument(dom);
  264. return(doc.getDocument());
  265. }
  266. /**
  267. * Outputs the cache statistics
  268. */
  269. public void getStatistics(PrintWriter out) {
  270. out.println("<h2>DOM cache statistics</h2><center><table border=\"2\">"+
  271. "<tr><td><b>Document URI</b></td>"+
  272. "<td><center><b>Build time</b></center></td>"+
  273. "<td><center><b>Access count</b></center></td>"+
  274. "<td><center><b>Last accessed</b></center></td>"+
  275. "<td><center><b>Last modified</b></center></td></tr>");
  276. for (int i=0; i<_count; i++) {
  277. CachedDocument doc = (CachedDocument)_references.get(_URIs[i]);
  278. out.print("<tr><td><a href=\""+_URIs[i]+"\">"+
  279. "<font size=-1>"+_URIs[i]+"</font></a></td>");
  280. out.print("<td><center>"+doc.getLatency()+"ms</center></td>");
  281. out.print("<td><center>"+doc.getAccessCount()+"</center></td>");
  282. out.print("<td><center>"+(new Date(doc.getLastReferenced()))+
  283. "</center></td>");
  284. out.print("<td><center>"+(new Date(doc.getLastModified()))+
  285. "</center></td>");
  286. out.println("</tr>");
  287. }
  288. out.println("</table></center>");
  289. }
  290. }