View Javadoc
1   /*
2    * Copyright (c) 2002-2017 Gargoyle Software Inc.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    * http://www.apache.org/licenses/LICENSE-2.0
8    *
9    * Unless required by applicable law or agreed to in writing, software
10   * distributed under the License is distributed on an "AS IS" BASIS,
11   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12   * See the License for the specific language governing permissions and
13   * limitations under the License.
14   */
15  package com.gargoylesoftware.htmlunit;
16  
17  import java.io.Serializable;
18  import java.net.URL;
19  import java.util.Collections;
20  import java.util.Date;
21  import java.util.HashMap;
22  import java.util.Map;
23  import java.util.regex.Matcher;
24  import java.util.regex.Pattern;
25  
26  import org.apache.http.client.utils.DateUtils;
27  import org.w3c.dom.css.CSSStyleSheet;
28  
29  import com.gargoylesoftware.htmlunit.util.UrlUtils;
30  
31  /**
32   * <p>Simple cache implementation which caches compiled JavaScript files and parsed CSS snippets. Caching
33   * compiled JavaScript files avoids unnecessary web requests and additional compilation overhead, while
34   * caching parsed CSS snippets avoids very expensive CSS parsing.</p>
35   *
36   * @author Marc Guillemot
37   * @author Daniel Gredler
38   * @author Ahmed Ashour
39   */
40  public class Cache implements Serializable {
41  
42      /** The maximum size of the cache. */
43      private int maxSize_ = 40;
44  
45      private static final Pattern DATE_HEADER_PATTERN = Pattern.compile("-?\\d+");
46  
47      /**
48       * The map which holds the cached responses. Note that when keying on URLs, we key on the string version
49       * of the URLs, rather than on the URLs themselves. This is done for performance, because a) the
50       * {@link java.net.URL#hashCode()} method is synchronized, and b) the {@link java.net.URL#hashCode()}
51       * method triggers DNS lookups of the URL hostnames' IPs. As of this writing, the HtmlUnit unit tests
52       * run ~20% faster whey keying on strings rather than on {@link java.net.URL} instances.
53       */
54      private final Map<String, Entry> entries_ = Collections.synchronizedMap(new HashMap<String, Entry>(maxSize_));
55  
56      /**
57       * A cache entry.
58       */
59      private static class Entry implements Comparable<Entry>, Serializable {
60          private final String key_;
61          private WebResponse response_;
62          private Object value_;
63          private long lastAccess_;
64  
65          Entry(final String key, final WebResponse response, final Object value) {
66              key_ = key;
67              response_ = response;
68              value_ = value;
69              lastAccess_ = System.currentTimeMillis();
70          }
71  
72          /**
73           * {@inheritDoc}
74           */
75          @Override
76          public int compareTo(final Entry other) {
77              if (lastAccess_ < other.lastAccess_) {
78                  return -1;
79              }
80              if (lastAccess_ == other.lastAccess_) {
81                  return 0;
82              }
83              return 1;
84          }
85  
86          /**
87           * {@inheritDoc}
88           */
89          @Override
90          public boolean equals(final Object obj) {
91              return obj instanceof Entry && lastAccess_ == ((Entry) obj).lastAccess_;
92          }
93  
94          /**
95           * {@inheritDoc}
96           */
97          @Override
98          public int hashCode() {
99              return ((Long) lastAccess_).hashCode();
100         }
101 
102         /**
103          * Updates the last access date.
104          */
105         public void touch() {
106             lastAccess_ = System.currentTimeMillis();
107         }
108     }
109 
110     /**
111      * Caches the specified object, if the corresponding request and response objects indicate
112      * that it is cacheable.
113      *
114      * @param request the request corresponding to the specified compiled script
115      * @param response the response corresponding to the specified compiled script
116      * @param toCache the object that is to be cached, if possible (may be for instance a compiled script or
117      * simply a WebResponse)
118      * @return whether the response was cached or not
119      */
120     public boolean cacheIfPossible(final WebRequest request, final WebResponse response, final Object toCache) {
121         if (isCacheable(request, response)) {
122             final URL url = response.getWebRequest().getUrl();
123             if (url == null) {
124                 return false;
125             }
126 
127             final Entry entry = new Entry(UrlUtils.normalize(url), response, toCache);
128             entries_.put(entry.key_, entry);
129             deleteOverflow();
130             return true;
131         }
132 
133         return false;
134     }
135 
136     /**
137      * Caches the parsed version of the specified CSS snippet. We key the cache based on CSS snippets (rather
138      * than requests and responses as is done above) because a) this allows us to cache inline CSS, b) CSS is
139      * extremely expensive to parse, so we want to avoid it as much as possible, c) CSS files aren't usually
140      * nearly as large as JavaScript files, so memory bloat won't be too bad, and d) caching on requests and
141      * responses requires checking dynamicity (see {@link #isCacheableContent(WebResponse)}), and headers often
142      * aren't set up correctly, disallowing caching when in fact it should be allowed.
143      *
144      * @param css the CSS snippet from which <tt>styleSheet</tt> is derived
145      * @param styleSheet the parsed version of <tt>css</tt>
146      */
147     public void cache(final String css, final CSSStyleSheet styleSheet) {
148         final Entry entry = new Entry(css, null, styleSheet);
149         entries_.put(entry.key_, entry);
150         deleteOverflow();
151     }
152 
153     /**
154      * Truncates the cache to the maximal number of entries.
155      */
156     protected void deleteOverflow() {
157         synchronized (entries_) {
158             while (entries_.size() > maxSize_) {
159                 final Entry oldestEntry = Collections.min(entries_.values());
160                 entries_.remove(oldestEntry.key_);
161                 if (oldestEntry.response_ != null) {
162                     oldestEntry.response_.cleanUp();
163                 }
164             }
165         }
166     }
167 
168     /**
169      * Determines if the specified response can be cached.
170      *
171      * @param request the performed request
172      * @param response the received response
173      * @return {@code true} if the response can be cached
174      */
175     protected boolean isCacheable(final WebRequest request, final WebResponse response) {
176         return HttpMethod.GET == response.getWebRequest().getHttpMethod()
177             && isCacheableContent(response);
178     }
179 
180     /**
181      * <p>Tries to guess if the content is dynamic or not.</p>
182      *
183      * <p>"Since origin servers do not always provide explicit expiration times, HTTP caches typically
184      * assign heuristic expiration times, employing algorithms that use other header values (such as the
185      * <tt>Last-Modified</tt> time) to estimate a plausible expiration time".</p>
186      *
187      * <p>The current implementation considers as dynamic content everything except responses with a
188      * <tt>Last-Modified</tt> header with a date older than 10 minutes or with an <tt>Expires</tt> header
189      * specifying expiration in more than 10 minutes.</p>
190      *
191      * @see <a href="http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html">RFC 2616</a>
192      * @param response the response to examine
193      * @return {@code true} if the response should be considered as cacheable
194      */
195     protected boolean isCacheableContent(final WebResponse response) {
196         final Date lastModified = parseDateHeader(response, "Last-Modified");
197         final Date expires = parseDateHeader(response, "Expires");
198 
199         final long delay = 10 * org.apache.commons.lang3.time.DateUtils.MILLIS_PER_MINUTE;
200         final long now = getCurrentTimestamp();
201 
202         return expires != null && (expires.getTime() - now > delay)
203                 || (expires == null && lastModified != null && now - lastModified.getTime() > delay);
204     }
205 
206     /**
207      * Gets the current time stamp. As method to allow overriding it, when simulating an other time.
208      * @return the current time stamp
209      */
210     protected long getCurrentTimestamp() {
211         return System.currentTimeMillis();
212     }
213 
214     /**
215      * Parses and returns the specified date header of the specified response. This method
216      * returns {@code null} if the specified header cannot be found or cannot be parsed as a date.
217      *
218      * @param response the response
219      * @param headerName the header name
220      * @return the specified date header of the specified response
221      */
222     protected Date parseDateHeader(final WebResponse response, final String headerName) {
223         final String value = response.getResponseHeaderValue(headerName);
224         if (value == null) {
225             return null;
226         }
227         final Matcher matcher = DATE_HEADER_PATTERN.matcher(value);
228         if (matcher.matches()) {
229             return new Date();
230         }
231         return DateUtils.parseDate(value);
232     }
233 
234     /**
235      * Returns the cached response corresponding to the specified request. If there is
236      * no corresponding cached object, this method returns {@code null}.
237      *
238      * @param request the request whose corresponding response is sought
239      * @return the cached response corresponding to the specified request if any
240      */
241     public WebResponse getCachedResponse(final WebRequest request) {
242         final Entry cachedEntry = getCacheEntry(request);
243         if (cachedEntry == null) {
244             return null;
245         }
246         return cachedEntry.response_;
247     }
248 
249     /**
250      * Returns the cached object corresponding to the specified request. If there is
251      * no corresponding cached object, this method returns {@code null}.
252      *
253      * @param request the request whose corresponding cached compiled script is sought
254      * @return the cached object corresponding to the specified request if any
255      */
256     public Object getCachedObject(final WebRequest request) {
257         final Entry cachedEntry = getCacheEntry(request);
258         if (cachedEntry == null) {
259             return null;
260         }
261         return cachedEntry.value_;
262     }
263 
264     private Entry getCacheEntry(final WebRequest request) {
265         if (HttpMethod.GET != request.getHttpMethod()) {
266             return null;
267         }
268 
269         final URL url = request.getUrl();
270         if (url == null) {
271             return null;
272         }
273         final Entry cachedEntry = entries_.get(UrlUtils.normalize(url));
274         if (cachedEntry == null) {
275             return null;
276         }
277         synchronized (entries_) {
278             cachedEntry.touch();
279         }
280         return cachedEntry;
281     }
282 
283     /**
284      * Returns the cached parsed version of the specified CSS snippet. If there is no
285      * corresponding cached stylesheet, this method returns {@code null}.
286      *
287      * @param css the CSS snippet whose cached stylesheet is sought
288      * @return the cached stylesheet corresponding to the specified CSS snippet
289      */
290     public CSSStyleSheet getCachedStyleSheet(final String css) {
291         final Entry cachedEntry = entries_.get(css);
292         if (cachedEntry == null) {
293             return null;
294         }
295         synchronized (entries_) {
296             cachedEntry.touch();
297         }
298         return (CSSStyleSheet) cachedEntry.value_;
299     }
300 
301     /**
302      * Returns the cache's maximum size. This is the maximum number of files that will
303      * be cached. The default is <tt>25</tt>.
304      *
305      * @return the cache's maximum size
306      */
307     public int getMaxSize() {
308         return maxSize_;
309     }
310 
311     /**
312      * Sets the cache's maximum size. This is the maximum number of files that will
313      * be cached. The default is <tt>25</tt>.
314      *
315      * @param maxSize the cache's maximum size (must be &gt;= 0)
316      */
317     public void setMaxSize(final int maxSize) {
318         if (maxSize < 0) {
319             throw new IllegalArgumentException("Illegal value for maxSize: " + maxSize);
320         }
321         maxSize_ = maxSize;
322         deleteOverflow();
323     }
324 
325     /**
326      * Returns the number of entries in the cache.
327      *
328      * @return the number of entries in the cache
329      */
330     public int getSize() {
331         return entries_.size();
332     }
333 
334     /**
335      * Clears the cache.
336      */
337     public void clear() {
338         synchronized (entries_) {
339             for (final Entry entry : entries_.values()) {
340                 if (entry.response_ != null) {
341                     entry.response_.cleanUp();
342                 }
343             }
344             entries_.clear();
345         }
346     }
347 
348 }