001// License: GPL. For details, see LICENSE file.
002package org.openstreetmap.josm.data.cache;
003
004import java.io.File;
005import java.io.FileNotFoundException;
006import java.io.IOException;
007import java.io.InputStream;
008import java.net.HttpURLConnection;
009import java.net.URL;
010import java.nio.file.Files;
011import java.security.SecureRandom;
012import java.util.Collections;
013import java.util.List;
014import java.util.Map;
015import java.util.Set;
016import java.util.concurrent.ConcurrentHashMap;
017import java.util.concurrent.ConcurrentMap;
018import java.util.concurrent.LinkedBlockingDeque;
019import java.util.concurrent.ThreadPoolExecutor;
020import java.util.concurrent.TimeUnit;
021import java.util.regex.Matcher;
022
023import org.openstreetmap.josm.data.cache.ICachedLoaderListener.LoadResult;
024import org.openstreetmap.josm.data.imagery.TileJobOptions;
025import org.openstreetmap.josm.data.preferences.IntegerProperty;
026import org.openstreetmap.josm.tools.CheckParameterUtil;
027import org.openstreetmap.josm.tools.HttpClient;
028import org.openstreetmap.josm.tools.Logging;
029import org.openstreetmap.josm.tools.Utils;
030
031import org.apache.commons.jcs3.access.behavior.ICacheAccess;
032import org.apache.commons.jcs3.engine.behavior.ICacheElement;
033
034/**
035 * Generic loader for HTTP based tiles. Uses custom attribute, to check, if entry has expired
036 * according to HTTP headers sent with tile. If so, it tries to verify using Etags
037 * or If-Modified-Since / Last-Modified.
038 *
039 * If the tile is not valid, it will try to download it from remote service and put it
040 * to cache. If remote server will fail it will try to use stale entry.
041 *
042 * This class will keep only one Job running for specified tile. All others will just finish, but
043 * listeners will be gathered and notified, once download job will be finished
044 *
045 * @author Wiktor Niesiobędzki
046 * @param <K> cache entry key type
047 * @param <V> cache value type
048 * @since 8168
049 */
050public abstract class JCSCachedTileLoaderJob<K, V extends CacheEntry> implements ICachedLoaderJob<K> {
051    protected static final long DEFAULT_EXPIRE_TIME = TimeUnit.DAYS.toMillis(7);
052    // Limit for the max-age value send by the server.
053    protected static final long EXPIRE_TIME_SERVER_LIMIT = TimeUnit.DAYS.toMillis(28);
054    // Absolute expire time limit. Cached tiles that are older will not be used,
055    // even if the refresh from the server fails.
056    protected static final long ABSOLUTE_EXPIRE_TIME_LIMIT = TimeUnit.DAYS.toMillis(365);
057
058    /**
059     * maximum download threads that will be started
060     */
061    public static final IntegerProperty THREAD_LIMIT = new IntegerProperty("cache.jcs.max_threads", 10);
062
063    /*
064     * ThreadPoolExecutor starts new threads, until THREAD_LIMIT is reached. Then it puts tasks into LinkedBlockingDeque.
065     *
066     * The queue works FIFO, so one needs to take care about ordering of the entries submitted
067     *
068     * There is no point in canceling tasks, that are already taken by worker threads (if we made so much effort, we can at least cache
069     * the response, so later it could be used). We could actually cancel what is in LIFOQueue, but this is a tradeoff between simplicity
070     * and performance (we do want to have something to offer to worker threads before tasks will be resubmitted by class consumer)
071     */
072
073    private static final ThreadPoolExecutor DEFAULT_DOWNLOAD_JOB_DISPATCHER = new ThreadPoolExecutor(
074            1, // we have a small queue, so threads will be quickly started (threads are started only, when queue is full)
075            THREAD_LIMIT.get(), // do not this number of threads
076            30, // keepalive for thread
077            TimeUnit.SECONDS,
078            // make queue of LIFO type - so recently requested tiles will be loaded first (assuming that these are which user is waiting to see)
079            new LinkedBlockingDeque<Runnable>(),
080            Utils.newThreadFactory("JCS-downloader-%d", Thread.NORM_PRIORITY)
081            );
082
083    private static final ConcurrentMap<String, Set<ICachedLoaderListener>> inProgress = new ConcurrentHashMap<>();
084    private static final ConcurrentMap<String, Boolean> useHead = new ConcurrentHashMap<>();
085
086    protected final long now; // when the job started
087
088    private final ICacheAccess<K, V> cache;
089    private ICacheElement<K, V> cacheElement;
090    protected V cacheData;
091    protected CacheEntryAttributes attributes;
092
093    // HTTP connection parameters
094    private final int connectTimeout;
095    private final int readTimeout;
096    private final Map<String, String> headers;
097    private final ThreadPoolExecutor downloadJobExecutor;
098    private Runnable finishTask;
099    private boolean force;
100    private final long minimumExpiryTime;
101
102    /**
103     * @param cache cache instance that we will work on
104     * @param options options of the request
105     * @param downloadJobExecutor that will be executing the jobs
106     */
107    protected JCSCachedTileLoaderJob(ICacheAccess<K, V> cache,
108            TileJobOptions options,
109            ThreadPoolExecutor downloadJobExecutor) {
110        CheckParameterUtil.ensureParameterNotNull(cache, "cache");
111        this.cache = cache;
112        this.now = System.currentTimeMillis();
113        this.connectTimeout = options.getConnectionTimeout();
114        this.readTimeout = options.getReadTimeout();
115        this.headers = options.getHeaders();
116        this.downloadJobExecutor = downloadJobExecutor;
117        this.minimumExpiryTime = TimeUnit.SECONDS.toMillis(options.getMinimumExpiryTime());
118    }
119
120    /**
121     * @param cache cache instance that we will work on
122     * @param options of the request
123     */
124    protected JCSCachedTileLoaderJob(ICacheAccess<K, V> cache,
125            TileJobOptions options) {
126        this(cache, options, DEFAULT_DOWNLOAD_JOB_DISPATCHER);
127    }
128
129    private void ensureCacheElement() {
130        if (cacheElement == null && getCacheKey() != null) {
131            cacheElement = cache.getCacheElement(getCacheKey());
132            if (cacheElement != null) {
133                attributes = (CacheEntryAttributes) cacheElement.getElementAttributes();
134                cacheData = cacheElement.getVal();
135            }
136        }
137    }
138
139    @Override
140    public V get() {
141        ensureCacheElement();
142        return cacheData;
143    }
144
145    @Override
146    public void submit(ICachedLoaderListener listener, boolean force) throws IOException {
147        this.force = force;
148        boolean first = false;
149        URL url = getUrl();
150        String deduplicationKey = null;
151        if (url != null) {
152            // url might be null, for example when Bing Attribution is not loaded yet
153            deduplicationKey = url.toString();
154        }
155        if (deduplicationKey == null) {
156            Logging.warn("No url returned for: {0}, skipping", getCacheKey());
157            throw new IllegalArgumentException("No url returned");
158        }
159        synchronized (this) {
160            first = !inProgress.containsKey(deduplicationKey);
161        }
162        inProgress.computeIfAbsent(deduplicationKey, k -> ConcurrentHashMap.newKeySet()).add(listener);
163
164        if (first || force) {
165            // submit all jobs to separate thread, so calling thread is not blocked with IO when loading from disk
166            Logging.debug("JCS - Submitting job for execution for url: {0}", getUrlNoException());
167            downloadJobExecutor.execute(this);
168        }
169    }
170
171    /**
172     * This method is run when job has finished
173     */
174    protected void executionFinished() {
175        if (finishTask != null) {
176            finishTask.run();
177        }
178    }
179
180    /**
181     * Checks if object from cache has sufficient data to be returned.
182     * @return {@code true} if object from cache has sufficient data to be returned
183     */
184    protected boolean isObjectLoadable() {
185        if (cacheData == null) {
186            return false;
187        }
188        return cacheData.getContent().length > 0;
189    }
190
191    /**
192     * Simple implementation. All errors should be cached as empty. Though some JDK (JDK8 on Windows for example)
193     * doesn't return 4xx error codes, instead they do throw an FileNotFoundException or IOException
194     * @param headerFields headers sent by server
195     * @param responseCode http status code
196     *
197     * @return true if we should put empty object into cache, regardless of what remote resource has returned
198     */
199    protected boolean cacheAsEmpty(Map<String, List<String>> headerFields, int responseCode) {
200        return attributes.getResponseCode() < 500;
201    }
202
203    /**
204     * Returns key under which discovered server settings will be kept.
205     * @return key under which discovered server settings will be kept
206     */
207    protected String getServerKey() {
208        try {
209            return getUrl().getHost();
210        } catch (IOException e) {
211            Logging.trace(e);
212            return null;
213        }
214    }
215
216    @Override
217    public void run() {
218        final Thread currentThread = Thread.currentThread();
219        final String oldName = currentThread.getName();
220        currentThread.setName("JCS Downloading: " + getUrlNoException());
221        Logging.debug("JCS - starting fetch of url: {0} ", getUrlNoException());
222        ensureCacheElement();
223        try {
224            // try to fetch from cache
225            if (!force && cacheElement != null && isCacheElementValid() && isObjectLoadable()) {
226                // we got something in cache, and it's valid, so lets return it
227                Logging.debug("JCS - Returning object from cache: {0}", getCacheKey());
228                finishLoading(LoadResult.SUCCESS);
229                return;
230            }
231
232            // try to load object from remote resource
233            if (loadObject()) {
234                finishLoading(LoadResult.SUCCESS);
235            } else {
236                // if loading failed - check if we can return stale entry
237                if (isObjectLoadable()) {
238                    // try to get stale entry in cache
239                    finishLoading(LoadResult.SUCCESS);
240                    Logging.debug("JCS - found stale object in cache: {0}", getUrlNoException());
241                } else {
242                    // failed completely
243                    finishLoading(LoadResult.FAILURE);
244                }
245            }
246        } finally {
247            executionFinished();
248            currentThread.setName(oldName);
249        }
250    }
251
252    private void finishLoading(LoadResult result) {
253        Set<ICachedLoaderListener> listeners;
254        try {
255            listeners = inProgress.remove(getUrl().toString());
256        } catch (IOException e) {
257            listeners = null;
258            Logging.trace(e);
259        }
260        if (listeners == null) {
261            Logging.warn("Listener not found for URL: {0}. Listener not notified!", getUrlNoException());
262            return;
263        }
264        for (ICachedLoaderListener l: listeners) {
265            l.loadingFinished(cacheData, attributes, result);
266        }
267    }
268
269    protected boolean isCacheElementValid() {
270        long expires = attributes.getExpirationTime();
271
272        // check by expire date set by server
273        if (expires != 0L) {
274            // put a limit to the expire time (some servers send a value
275            // that is too large)
276            expires = Math.min(expires, attributes.getCreateTime() + Math.max(EXPIRE_TIME_SERVER_LIMIT, minimumExpiryTime));
277            if (now > expires) {
278                Logging.debug("JCS - Object {0} has expired -> valid to {1}, now is: {2}",
279                        getUrlNoException(), Long.toString(expires), Long.toString(now));
280                return false;
281            }
282        } else if (attributes.getLastModification() > 0 &&
283                now - attributes.getLastModification() > Math.max(DEFAULT_EXPIRE_TIME, minimumExpiryTime)) {
284            // check by file modification date
285            Logging.debug("JCS - Object has expired, maximum file age reached {0}", getUrlNoException());
286            return false;
287        } else if (now - attributes.getCreateTime() > Math.max(DEFAULT_EXPIRE_TIME, minimumExpiryTime)) {
288            Logging.debug("JCS - Object has expired, maximum time since object creation reached {0}", getUrlNoException());
289            return false;
290        }
291        return true;
292    }
293
294    /**
295     * @return true if object was successfully downloaded, false, if there was a loading failure
296     */
297    private boolean loadObject() {
298        if (attributes == null) {
299            attributes = new CacheEntryAttributes();
300        }
301        final URL url = this.getUrlNoException();
302        if (url == null) {
303            return false;
304        }
305
306        if (url.getProtocol().contains("http")) {
307            return loadObjectHttp();
308        }
309        if (url.getProtocol().contains("file")) {
310            return loadObjectFile(url);
311        }
312
313        return false;
314    }
315
316    private boolean loadObjectFile(URL url) {
317        String fileName = url.toExternalForm();
318        File file = new File(fileName.substring("file:/".length() - 1));
319        if (!file.exists()) {
320            file = new File(fileName.substring("file://".length() - 1));
321        }
322        try (InputStream fileInputStream = Files.newInputStream(file.toPath())) {
323            cacheData = createCacheEntry(Utils.readBytesFromStream(fileInputStream));
324            cache.put(getCacheKey(), cacheData, attributes);
325            return true;
326        } catch (IOException e) {
327            Logging.error(e);
328            attributes.setError(e);
329            attributes.setException(e);
330        }
331        return false;
332    }
333
334    /**
335     * @return true if object was successfully downloaded via http, false, if there was a loading failure
336     */
337    private boolean loadObjectHttp() {
338        try {
339            // if we have object in cache, and host doesn't support If-Modified-Since nor If-None-Match
340            // then just use HEAD request and check returned values
341            if (isObjectLoadable() &&
342                    Boolean.TRUE.equals(useHead.get(getServerKey())) &&
343                    isCacheValidUsingHead()) {
344                Logging.debug("JCS - cache entry verified using HEAD request: {0}", getUrl());
345                return true;
346            }
347
348            Logging.debug("JCS - starting HttpClient GET request for URL: {0}", getUrl());
349            final HttpClient request = getRequest("GET");
350
351            if (isObjectLoadable() &&
352                    (now - attributes.getLastModification()) <= ABSOLUTE_EXPIRE_TIME_LIMIT) {
353                request.setIfModifiedSince(attributes.getLastModification());
354            }
355            if (isObjectLoadable() && attributes.getEtag() != null) {
356                request.setHeader("If-None-Match", attributes.getEtag());
357            }
358
359            final HttpClient.Response urlConn = request.connect();
360
361            if (urlConn.getResponseCode() == 304) {
362                // If isModifiedSince or If-None-Match has been set
363                // and the server answers with a HTTP 304 = "Not Modified"
364                Logging.debug("JCS - If-Modified-Since/ETag test: local version is up to date: {0}", getUrl());
365                // update cache attributes
366                attributes = parseHeaders(urlConn);
367                cache.put(getCacheKey(), cacheData, attributes);
368                return true;
369            } else if (isObjectLoadable() // we have an object in cache, but we haven't received 304 response code
370                    && (
371                            (attributes.getEtag() != null && attributes.getEtag().equals(urlConn.getHeaderField("ETag"))) ||
372                            attributes.getLastModification() == urlConn.getLastModified())
373                    ) {
374                // we sent ETag or If-Modified-Since, but didn't get 304 response code
375                // for further requests - use HEAD
376                String serverKey = getServerKey();
377                Logging.info("JCS - Host: {0} found not to return 304 codes for If-Modified-Since or If-None-Match headers",
378                        serverKey);
379                useHead.put(serverKey, Boolean.TRUE);
380            }
381
382            attributes = parseHeaders(urlConn);
383
384            for (int i = 0; i < 5; ++i) {
385                if (urlConn.getResponseCode() == HttpURLConnection.HTTP_UNAVAILABLE) {
386                    Thread.sleep(5000L+new SecureRandom().nextInt(5000));
387                    continue;
388                }
389
390                attributes.setResponseCode(urlConn.getResponseCode());
391                byte[] raw;
392                if (urlConn.getResponseCode() == HttpURLConnection.HTTP_OK) {
393                    raw = Utils.readBytesFromStream(urlConn.getContent());
394                } else {
395                    raw = new byte[]{};
396                    try {
397                        String data = urlConn.fetchContent();
398                        if (!data.isEmpty()) {
399                            String detectErrorMessage = detectErrorMessage(data);
400                            if (detectErrorMessage != null) {
401                                attributes.setErrorMessage(detectErrorMessage);
402                            }
403                        }
404                    } catch (IOException e) {
405                        Logging.warn(e);
406                    }
407                }
408
409                if (isResponseLoadable(urlConn.getHeaderFields(), urlConn.getResponseCode(), raw)) {
410                    // we need to check cacheEmpty, so for cases, when data is returned, but we want to store
411                    // as empty (eg. empty tile images) to save some space
412                    cacheData = createCacheEntry(raw);
413                    cache.put(getCacheKey(), cacheData, attributes);
414                    Logging.debug("JCS - downloaded key: {0}, length: {1}, url: {2}",
415                            getCacheKey(), raw.length, getUrl());
416                    return true;
417                } else if (cacheAsEmpty(urlConn.getHeaderFields(), urlConn.getResponseCode())) {
418                    cacheData = createCacheEntry(new byte[]{});
419                    cache.put(getCacheKey(), cacheData, attributes);
420                    Logging.debug("JCS - Caching empty object {0}", getUrl());
421                    return true;
422                } else {
423                    Logging.debug("JCS - failure during load - response is not loadable nor cached as empty");
424                    return false;
425                }
426            }
427        } catch (FileNotFoundException e) {
428            Logging.debug("JCS - Caching empty object as server returned 404 for: {0}", getUrlNoException());
429            attributes.setResponseCode(404);
430            attributes.setError(e);
431            attributes.setException(e);
432            boolean doCache = isResponseLoadable(null, 404, null) || cacheAsEmpty(Collections.emptyMap(), 404);
433            if (doCache) {
434                cacheData = createCacheEntry(new byte[]{});
435                cache.put(getCacheKey(), cacheData, attributes);
436            }
437            return doCache;
438        } catch (IOException e) {
439            Logging.debug("JCS - IOException during communication with server for: {0}", getUrlNoException());
440            if (isObjectLoadable()) {
441                return true;
442            } else {
443                attributes.setError(e);
444                attributes.setException(e);
445                attributes.setResponseCode(599); // set dummy error code, greater than 500 so it will be not cached
446                return false;
447            }
448
449        } catch (InterruptedException e) {
450            attributes.setError(e);
451            attributes.setException(e);
452            Logging.logWithStackTrace(Logging.LEVEL_WARN, e, "JCS - Exception during download {0}", getUrlNoException());
453            Thread.currentThread().interrupt();
454        }
455        Logging.warn("JCS - Silent failure during download: {0}", getUrlNoException());
456        return false;
457    }
458
459    /**
460     * Tries do detect an error message from given string.
461     * @param data string to analyze
462     * @return error message if detected, or null
463     * @since 14535
464     */
465    public String detectErrorMessage(String data) {
466        Matcher m = HttpClient.getTomcatErrorMatcher(data);
467        return m.matches() ? m.group(1).replace("'", "''") : null;
468    }
469
470    /**
471     * Check if the object is loadable. This means, if the data will be parsed, and if this response
472     * will finish as successful retrieve.
473     *
474     * This simple implementation doesn't load empty response, nor client (4xx) and server (5xx) errors
475     *
476     * @param headerFields headers sent by server
477     * @param responseCode http status code
478     * @param raw data read from server
479     * @return true if object should be cached and returned to listener
480     */
481    protected boolean isResponseLoadable(Map<String, List<String>> headerFields, int responseCode, byte[] raw) {
482        return raw != null && raw.length != 0 && responseCode < 400;
483    }
484
485    protected abstract V createCacheEntry(byte[] content);
486
487    protected CacheEntryAttributes parseHeaders(HttpClient.Response urlConn) {
488        CacheEntryAttributes ret = new CacheEntryAttributes();
489
490        /*
491         * according to https://www.ietf.org/rfc/rfc2616.txt Cache-Control takes precedence over max-age
492         * max-age is for private caches, s-max-age is for shared caches. We take any value that is larger
493         */
494        Long expiration = 0L;
495        String cacheControl = urlConn.getHeaderField("Cache-Control");
496        if (cacheControl != null) {
497            for (String token: cacheControl.split(",", -1)) {
498                try {
499                    if (token.startsWith("max-age=")) {
500                        expiration = Math.max(expiration,
501                                TimeUnit.SECONDS.toMillis(Long.parseLong(token.substring("max-age=".length())))
502                                + System.currentTimeMillis()
503                                );
504                    }
505                    if (token.startsWith("s-max-age=")) {
506                        expiration = Math.max(expiration,
507                                TimeUnit.SECONDS.toMillis(Long.parseLong(token.substring("s-max-age=".length())))
508                                + System.currentTimeMillis()
509                                );
510                    }
511                } catch (NumberFormatException e) {
512                    // ignore malformed Cache-Control headers
513                    Logging.trace(e);
514                }
515            }
516        }
517
518        if (expiration.equals(0L)) {
519            expiration = urlConn.getExpiration();
520        }
521
522        // if nothing is found - set default
523        if (expiration.equals(0L)) {
524            expiration = System.currentTimeMillis() + DEFAULT_EXPIRE_TIME;
525        }
526
527        ret.setExpirationTime(Math.max(minimumExpiryTime + System.currentTimeMillis(), expiration));
528        ret.setLastModification(now);
529        ret.setEtag(urlConn.getHeaderField("ETag"));
530
531        return ret;
532    }
533
534    private HttpClient getRequest(String requestMethod) throws IOException {
535        final HttpClient urlConn = HttpClient.create(getUrl(), requestMethod);
536        urlConn.setAccept("text/html, image/png, image/jpeg, image/gif, */*");
537        urlConn.setReadTimeout(readTimeout); // 30 seconds read timeout
538        urlConn.setConnectTimeout(connectTimeout);
539        if (headers != null) {
540            urlConn.setHeaders(headers);
541        }
542
543        final boolean noCache = force
544                // To remove when switching to Java 11
545                // Workaround for https://bugs.openjdk.java.net/browse/JDK-8146450
546                || (Utils.getJavaVersion() == 8 && Utils.isRunningJavaWebStart());
547        urlConn.useCache(!noCache);
548
549        return urlConn;
550    }
551
552    private boolean isCacheValidUsingHead() throws IOException {
553        final HttpClient.Response urlConn = getRequest("HEAD").connect();
554        long lastModified = urlConn.getLastModified();
555        boolean ret = (attributes.getEtag() != null && attributes.getEtag().equals(urlConn.getHeaderField("ETag"))) ||
556                (lastModified != 0 && lastModified <= attributes.getLastModification());
557        if (ret) {
558            // update attributes
559            attributes = parseHeaders(urlConn);
560            cache.put(getCacheKey(), cacheData, attributes);
561        }
562        return ret;
563    }
564
565    /**
566     * TODO: move to JobFactory
567     * cancels all outstanding tasks in the queue.
568     */
569    public void cancelOutstandingTasks() {
570        for (Runnable r: downloadJobExecutor.getQueue()) {
571            if (downloadJobExecutor.remove(r) && r instanceof JCSCachedTileLoaderJob) {
572                ((JCSCachedTileLoaderJob<?, ?>) r).handleJobCancellation();
573            }
574        }
575    }
576
577    /**
578     * Sets a job, that will be run, when job will finish execution
579     * @param runnable that will be executed
580     */
581    public void setFinishedTask(Runnable runnable) {
582        this.finishTask = runnable;
583
584    }
585
586    /**
587     * Marks this job as canceled
588     */
589    public void handleJobCancellation() {
590        finishLoading(LoadResult.CANCELED);
591    }
592
593    private URL getUrlNoException() {
594        try {
595            return getUrl();
596        } catch (IOException e) {
597            Logging.trace(e);
598            return null;
599        }
600    }
601}