001// License: GPL. For details, see LICENSE file. 002package org.openstreetmap.josm.data.cache; 003 004import java.io.File; 005import java.io.FileNotFoundException; 006import java.io.IOException; 007import java.io.InputStream; 008import java.net.HttpURLConnection; 009import java.net.URL; 010import java.nio.file.Files; 011import java.security.SecureRandom; 012import java.util.Collections; 013import java.util.List; 014import java.util.Map; 015import java.util.Set; 016import java.util.concurrent.ConcurrentHashMap; 017import java.util.concurrent.ConcurrentMap; 018import java.util.concurrent.LinkedBlockingDeque; 019import java.util.concurrent.ThreadPoolExecutor; 020import java.util.concurrent.TimeUnit; 021import java.util.regex.Matcher; 022 023import org.openstreetmap.josm.data.cache.ICachedLoaderListener.LoadResult; 024import org.openstreetmap.josm.data.imagery.TileJobOptions; 025import org.openstreetmap.josm.data.preferences.IntegerProperty; 026import org.openstreetmap.josm.tools.CheckParameterUtil; 027import org.openstreetmap.josm.tools.HttpClient; 028import org.openstreetmap.josm.tools.Logging; 029import org.openstreetmap.josm.tools.Utils; 030 031import org.apache.commons.jcs3.access.behavior.ICacheAccess; 032import org.apache.commons.jcs3.engine.behavior.ICacheElement; 033 034/** 035 * Generic loader for HTTP based tiles. Uses custom attribute, to check, if entry has expired 036 * according to HTTP headers sent with tile. If so, it tries to verify using Etags 037 * or If-Modified-Since / Last-Modified. 038 * 039 * If the tile is not valid, it will try to download it from remote service and put it 040 * to cache. If remote server will fail it will try to use stale entry. 041 * 042 * This class will keep only one Job running for specified tile. All others will just finish, but 043 * listeners will be gathered and notified, once download job will be finished 044 * 045 * @author Wiktor Niesiobędzki 046 * @param <K> cache entry key type 047 * @param <V> cache value type 048 * @since 8168 049 */ 050public abstract class JCSCachedTileLoaderJob<K, V extends CacheEntry> implements ICachedLoaderJob<K> { 051 protected static final long DEFAULT_EXPIRE_TIME = TimeUnit.DAYS.toMillis(7); 052 // Limit for the max-age value send by the server. 053 protected static final long EXPIRE_TIME_SERVER_LIMIT = TimeUnit.DAYS.toMillis(28); 054 // Absolute expire time limit. Cached tiles that are older will not be used, 055 // even if the refresh from the server fails. 056 protected static final long ABSOLUTE_EXPIRE_TIME_LIMIT = TimeUnit.DAYS.toMillis(365); 057 058 /** 059 * maximum download threads that will be started 060 */ 061 public static final IntegerProperty THREAD_LIMIT = new IntegerProperty("cache.jcs.max_threads", 10); 062 063 /* 064 * ThreadPoolExecutor starts new threads, until THREAD_LIMIT is reached. Then it puts tasks into LinkedBlockingDeque. 065 * 066 * The queue works FIFO, so one needs to take care about ordering of the entries submitted 067 * 068 * There is no point in canceling tasks, that are already taken by worker threads (if we made so much effort, we can at least cache 069 * the response, so later it could be used). We could actually cancel what is in LIFOQueue, but this is a tradeoff between simplicity 070 * and performance (we do want to have something to offer to worker threads before tasks will be resubmitted by class consumer) 071 */ 072 073 private static final ThreadPoolExecutor DEFAULT_DOWNLOAD_JOB_DISPATCHER = new ThreadPoolExecutor( 074 1, // we have a small queue, so threads will be quickly started (threads are started only, when queue is full) 075 THREAD_LIMIT.get(), // do not this number of threads 076 30, // keepalive for thread 077 TimeUnit.SECONDS, 078 // make queue of LIFO type - so recently requested tiles will be loaded first (assuming that these are which user is waiting to see) 079 new LinkedBlockingDeque<Runnable>(), 080 Utils.newThreadFactory("JCS-downloader-%d", Thread.NORM_PRIORITY) 081 ); 082 083 private static final ConcurrentMap<String, Set<ICachedLoaderListener>> inProgress = new ConcurrentHashMap<>(); 084 private static final ConcurrentMap<String, Boolean> useHead = new ConcurrentHashMap<>(); 085 086 protected final long now; // when the job started 087 088 private final ICacheAccess<K, V> cache; 089 private ICacheElement<K, V> cacheElement; 090 protected V cacheData; 091 protected CacheEntryAttributes attributes; 092 093 // HTTP connection parameters 094 private final int connectTimeout; 095 private final int readTimeout; 096 private final Map<String, String> headers; 097 private final ThreadPoolExecutor downloadJobExecutor; 098 private Runnable finishTask; 099 private boolean force; 100 private final long minimumExpiryTime; 101 102 /** 103 * @param cache cache instance that we will work on 104 * @param options options of the request 105 * @param downloadJobExecutor that will be executing the jobs 106 */ 107 protected JCSCachedTileLoaderJob(ICacheAccess<K, V> cache, 108 TileJobOptions options, 109 ThreadPoolExecutor downloadJobExecutor) { 110 CheckParameterUtil.ensureParameterNotNull(cache, "cache"); 111 this.cache = cache; 112 this.now = System.currentTimeMillis(); 113 this.connectTimeout = options.getConnectionTimeout(); 114 this.readTimeout = options.getReadTimeout(); 115 this.headers = options.getHeaders(); 116 this.downloadJobExecutor = downloadJobExecutor; 117 this.minimumExpiryTime = TimeUnit.SECONDS.toMillis(options.getMinimumExpiryTime()); 118 } 119 120 /** 121 * @param cache cache instance that we will work on 122 * @param options of the request 123 */ 124 protected JCSCachedTileLoaderJob(ICacheAccess<K, V> cache, 125 TileJobOptions options) { 126 this(cache, options, DEFAULT_DOWNLOAD_JOB_DISPATCHER); 127 } 128 129 private void ensureCacheElement() { 130 if (cacheElement == null && getCacheKey() != null) { 131 cacheElement = cache.getCacheElement(getCacheKey()); 132 if (cacheElement != null) { 133 attributes = (CacheEntryAttributes) cacheElement.getElementAttributes(); 134 cacheData = cacheElement.getVal(); 135 } 136 } 137 } 138 139 @Override 140 public V get() { 141 ensureCacheElement(); 142 return cacheData; 143 } 144 145 @Override 146 public void submit(ICachedLoaderListener listener, boolean force) throws IOException { 147 this.force = force; 148 boolean first = false; 149 URL url = getUrl(); 150 String deduplicationKey = null; 151 if (url != null) { 152 // url might be null, for example when Bing Attribution is not loaded yet 153 deduplicationKey = url.toString(); 154 } 155 if (deduplicationKey == null) { 156 Logging.warn("No url returned for: {0}, skipping", getCacheKey()); 157 throw new IllegalArgumentException("No url returned"); 158 } 159 synchronized (this) { 160 first = !inProgress.containsKey(deduplicationKey); 161 } 162 inProgress.computeIfAbsent(deduplicationKey, k -> ConcurrentHashMap.newKeySet()).add(listener); 163 164 if (first || force) { 165 // submit all jobs to separate thread, so calling thread is not blocked with IO when loading from disk 166 Logging.debug("JCS - Submitting job for execution for url: {0}", getUrlNoException()); 167 downloadJobExecutor.execute(this); 168 } 169 } 170 171 /** 172 * This method is run when job has finished 173 */ 174 protected void executionFinished() { 175 if (finishTask != null) { 176 finishTask.run(); 177 } 178 } 179 180 /** 181 * Checks if object from cache has sufficient data to be returned. 182 * @return {@code true} if object from cache has sufficient data to be returned 183 */ 184 protected boolean isObjectLoadable() { 185 if (cacheData == null) { 186 return false; 187 } 188 return cacheData.getContent().length > 0; 189 } 190 191 /** 192 * Simple implementation. All errors should be cached as empty. Though some JDK (JDK8 on Windows for example) 193 * doesn't return 4xx error codes, instead they do throw an FileNotFoundException or IOException 194 * @param headerFields headers sent by server 195 * @param responseCode http status code 196 * 197 * @return true if we should put empty object into cache, regardless of what remote resource has returned 198 */ 199 protected boolean cacheAsEmpty(Map<String, List<String>> headerFields, int responseCode) { 200 return attributes.getResponseCode() < 500; 201 } 202 203 /** 204 * Returns key under which discovered server settings will be kept. 205 * @return key under which discovered server settings will be kept 206 */ 207 protected String getServerKey() { 208 try { 209 return getUrl().getHost(); 210 } catch (IOException e) { 211 Logging.trace(e); 212 return null; 213 } 214 } 215 216 @Override 217 public void run() { 218 final Thread currentThread = Thread.currentThread(); 219 final String oldName = currentThread.getName(); 220 currentThread.setName("JCS Downloading: " + getUrlNoException()); 221 Logging.debug("JCS - starting fetch of url: {0} ", getUrlNoException()); 222 ensureCacheElement(); 223 try { 224 // try to fetch from cache 225 if (!force && cacheElement != null && isCacheElementValid() && isObjectLoadable()) { 226 // we got something in cache, and it's valid, so lets return it 227 Logging.debug("JCS - Returning object from cache: {0}", getCacheKey()); 228 finishLoading(LoadResult.SUCCESS); 229 return; 230 } 231 232 // try to load object from remote resource 233 if (loadObject()) { 234 finishLoading(LoadResult.SUCCESS); 235 } else { 236 // if loading failed - check if we can return stale entry 237 if (isObjectLoadable()) { 238 // try to get stale entry in cache 239 finishLoading(LoadResult.SUCCESS); 240 Logging.debug("JCS - found stale object in cache: {0}", getUrlNoException()); 241 } else { 242 // failed completely 243 finishLoading(LoadResult.FAILURE); 244 } 245 } 246 } finally { 247 executionFinished(); 248 currentThread.setName(oldName); 249 } 250 } 251 252 private void finishLoading(LoadResult result) { 253 Set<ICachedLoaderListener> listeners; 254 try { 255 listeners = inProgress.remove(getUrl().toString()); 256 } catch (IOException e) { 257 listeners = null; 258 Logging.trace(e); 259 } 260 if (listeners == null) { 261 Logging.warn("Listener not found for URL: {0}. Listener not notified!", getUrlNoException()); 262 return; 263 } 264 for (ICachedLoaderListener l: listeners) { 265 l.loadingFinished(cacheData, attributes, result); 266 } 267 } 268 269 protected boolean isCacheElementValid() { 270 long expires = attributes.getExpirationTime(); 271 272 // check by expire date set by server 273 if (expires != 0L) { 274 // put a limit to the expire time (some servers send a value 275 // that is too large) 276 expires = Math.min(expires, attributes.getCreateTime() + Math.max(EXPIRE_TIME_SERVER_LIMIT, minimumExpiryTime)); 277 if (now > expires) { 278 Logging.debug("JCS - Object {0} has expired -> valid to {1}, now is: {2}", 279 getUrlNoException(), Long.toString(expires), Long.toString(now)); 280 return false; 281 } 282 } else if (attributes.getLastModification() > 0 && 283 now - attributes.getLastModification() > Math.max(DEFAULT_EXPIRE_TIME, minimumExpiryTime)) { 284 // check by file modification date 285 Logging.debug("JCS - Object has expired, maximum file age reached {0}", getUrlNoException()); 286 return false; 287 } else if (now - attributes.getCreateTime() > Math.max(DEFAULT_EXPIRE_TIME, minimumExpiryTime)) { 288 Logging.debug("JCS - Object has expired, maximum time since object creation reached {0}", getUrlNoException()); 289 return false; 290 } 291 return true; 292 } 293 294 /** 295 * @return true if object was successfully downloaded, false, if there was a loading failure 296 */ 297 private boolean loadObject() { 298 if (attributes == null) { 299 attributes = new CacheEntryAttributes(); 300 } 301 final URL url = this.getUrlNoException(); 302 if (url == null) { 303 return false; 304 } 305 306 if (url.getProtocol().contains("http")) { 307 return loadObjectHttp(); 308 } 309 if (url.getProtocol().contains("file")) { 310 return loadObjectFile(url); 311 } 312 313 return false; 314 } 315 316 private boolean loadObjectFile(URL url) { 317 String fileName = url.toExternalForm(); 318 File file = new File(fileName.substring("file:/".length() - 1)); 319 if (!file.exists()) { 320 file = new File(fileName.substring("file://".length() - 1)); 321 } 322 try (InputStream fileInputStream = Files.newInputStream(file.toPath())) { 323 cacheData = createCacheEntry(Utils.readBytesFromStream(fileInputStream)); 324 cache.put(getCacheKey(), cacheData, attributes); 325 return true; 326 } catch (IOException e) { 327 Logging.error(e); 328 attributes.setError(e); 329 attributes.setException(e); 330 } 331 return false; 332 } 333 334 /** 335 * @return true if object was successfully downloaded via http, false, if there was a loading failure 336 */ 337 private boolean loadObjectHttp() { 338 try { 339 // if we have object in cache, and host doesn't support If-Modified-Since nor If-None-Match 340 // then just use HEAD request and check returned values 341 if (isObjectLoadable() && 342 Boolean.TRUE.equals(useHead.get(getServerKey())) && 343 isCacheValidUsingHead()) { 344 Logging.debug("JCS - cache entry verified using HEAD request: {0}", getUrl()); 345 return true; 346 } 347 348 Logging.debug("JCS - starting HttpClient GET request for URL: {0}", getUrl()); 349 final HttpClient request = getRequest("GET"); 350 351 if (isObjectLoadable() && 352 (now - attributes.getLastModification()) <= ABSOLUTE_EXPIRE_TIME_LIMIT) { 353 request.setIfModifiedSince(attributes.getLastModification()); 354 } 355 if (isObjectLoadable() && attributes.getEtag() != null) { 356 request.setHeader("If-None-Match", attributes.getEtag()); 357 } 358 359 final HttpClient.Response urlConn = request.connect(); 360 361 if (urlConn.getResponseCode() == 304) { 362 // If isModifiedSince or If-None-Match has been set 363 // and the server answers with a HTTP 304 = "Not Modified" 364 Logging.debug("JCS - If-Modified-Since/ETag test: local version is up to date: {0}", getUrl()); 365 // update cache attributes 366 attributes = parseHeaders(urlConn); 367 cache.put(getCacheKey(), cacheData, attributes); 368 return true; 369 } else if (isObjectLoadable() // we have an object in cache, but we haven't received 304 response code 370 && ( 371 (attributes.getEtag() != null && attributes.getEtag().equals(urlConn.getHeaderField("ETag"))) || 372 attributes.getLastModification() == urlConn.getLastModified()) 373 ) { 374 // we sent ETag or If-Modified-Since, but didn't get 304 response code 375 // for further requests - use HEAD 376 String serverKey = getServerKey(); 377 Logging.info("JCS - Host: {0} found not to return 304 codes for If-Modified-Since or If-None-Match headers", 378 serverKey); 379 useHead.put(serverKey, Boolean.TRUE); 380 } 381 382 attributes = parseHeaders(urlConn); 383 384 for (int i = 0; i < 5; ++i) { 385 if (urlConn.getResponseCode() == HttpURLConnection.HTTP_UNAVAILABLE) { 386 Thread.sleep(5000L+new SecureRandom().nextInt(5000)); 387 continue; 388 } 389 390 attributes.setResponseCode(urlConn.getResponseCode()); 391 byte[] raw; 392 if (urlConn.getResponseCode() == HttpURLConnection.HTTP_OK) { 393 raw = Utils.readBytesFromStream(urlConn.getContent()); 394 } else { 395 raw = new byte[]{}; 396 try { 397 String data = urlConn.fetchContent(); 398 if (!data.isEmpty()) { 399 String detectErrorMessage = detectErrorMessage(data); 400 if (detectErrorMessage != null) { 401 attributes.setErrorMessage(detectErrorMessage); 402 } 403 } 404 } catch (IOException e) { 405 Logging.warn(e); 406 } 407 } 408 409 if (isResponseLoadable(urlConn.getHeaderFields(), urlConn.getResponseCode(), raw)) { 410 // we need to check cacheEmpty, so for cases, when data is returned, but we want to store 411 // as empty (eg. empty tile images) to save some space 412 cacheData = createCacheEntry(raw); 413 cache.put(getCacheKey(), cacheData, attributes); 414 Logging.debug("JCS - downloaded key: {0}, length: {1}, url: {2}", 415 getCacheKey(), raw.length, getUrl()); 416 return true; 417 } else if (cacheAsEmpty(urlConn.getHeaderFields(), urlConn.getResponseCode())) { 418 cacheData = createCacheEntry(new byte[]{}); 419 cache.put(getCacheKey(), cacheData, attributes); 420 Logging.debug("JCS - Caching empty object {0}", getUrl()); 421 return true; 422 } else { 423 Logging.debug("JCS - failure during load - response is not loadable nor cached as empty"); 424 return false; 425 } 426 } 427 } catch (FileNotFoundException e) { 428 Logging.debug("JCS - Caching empty object as server returned 404 for: {0}", getUrlNoException()); 429 attributes.setResponseCode(404); 430 attributes.setError(e); 431 attributes.setException(e); 432 boolean doCache = isResponseLoadable(null, 404, null) || cacheAsEmpty(Collections.emptyMap(), 404); 433 if (doCache) { 434 cacheData = createCacheEntry(new byte[]{}); 435 cache.put(getCacheKey(), cacheData, attributes); 436 } 437 return doCache; 438 } catch (IOException e) { 439 Logging.debug("JCS - IOException during communication with server for: {0}", getUrlNoException()); 440 if (isObjectLoadable()) { 441 return true; 442 } else { 443 attributes.setError(e); 444 attributes.setException(e); 445 attributes.setResponseCode(599); // set dummy error code, greater than 500 so it will be not cached 446 return false; 447 } 448 449 } catch (InterruptedException e) { 450 attributes.setError(e); 451 attributes.setException(e); 452 Logging.logWithStackTrace(Logging.LEVEL_WARN, e, "JCS - Exception during download {0}", getUrlNoException()); 453 Thread.currentThread().interrupt(); 454 } 455 Logging.warn("JCS - Silent failure during download: {0}", getUrlNoException()); 456 return false; 457 } 458 459 /** 460 * Tries do detect an error message from given string. 461 * @param data string to analyze 462 * @return error message if detected, or null 463 * @since 14535 464 */ 465 public String detectErrorMessage(String data) { 466 Matcher m = HttpClient.getTomcatErrorMatcher(data); 467 return m.matches() ? m.group(1).replace("'", "''") : null; 468 } 469 470 /** 471 * Check if the object is loadable. This means, if the data will be parsed, and if this response 472 * will finish as successful retrieve. 473 * 474 * This simple implementation doesn't load empty response, nor client (4xx) and server (5xx) errors 475 * 476 * @param headerFields headers sent by server 477 * @param responseCode http status code 478 * @param raw data read from server 479 * @return true if object should be cached and returned to listener 480 */ 481 protected boolean isResponseLoadable(Map<String, List<String>> headerFields, int responseCode, byte[] raw) { 482 return raw != null && raw.length != 0 && responseCode < 400; 483 } 484 485 protected abstract V createCacheEntry(byte[] content); 486 487 protected CacheEntryAttributes parseHeaders(HttpClient.Response urlConn) { 488 CacheEntryAttributes ret = new CacheEntryAttributes(); 489 490 /* 491 * according to https://www.ietf.org/rfc/rfc2616.txt Cache-Control takes precedence over max-age 492 * max-age is for private caches, s-max-age is for shared caches. We take any value that is larger 493 */ 494 Long expiration = 0L; 495 String cacheControl = urlConn.getHeaderField("Cache-Control"); 496 if (cacheControl != null) { 497 for (String token: cacheControl.split(",", -1)) { 498 try { 499 if (token.startsWith("max-age=")) { 500 expiration = Math.max(expiration, 501 TimeUnit.SECONDS.toMillis(Long.parseLong(token.substring("max-age=".length()))) 502 + System.currentTimeMillis() 503 ); 504 } 505 if (token.startsWith("s-max-age=")) { 506 expiration = Math.max(expiration, 507 TimeUnit.SECONDS.toMillis(Long.parseLong(token.substring("s-max-age=".length()))) 508 + System.currentTimeMillis() 509 ); 510 } 511 } catch (NumberFormatException e) { 512 // ignore malformed Cache-Control headers 513 Logging.trace(e); 514 } 515 } 516 } 517 518 if (expiration.equals(0L)) { 519 expiration = urlConn.getExpiration(); 520 } 521 522 // if nothing is found - set default 523 if (expiration.equals(0L)) { 524 expiration = System.currentTimeMillis() + DEFAULT_EXPIRE_TIME; 525 } 526 527 ret.setExpirationTime(Math.max(minimumExpiryTime + System.currentTimeMillis(), expiration)); 528 ret.setLastModification(now); 529 ret.setEtag(urlConn.getHeaderField("ETag")); 530 531 return ret; 532 } 533 534 private HttpClient getRequest(String requestMethod) throws IOException { 535 final HttpClient urlConn = HttpClient.create(getUrl(), requestMethod); 536 urlConn.setAccept("text/html, image/png, image/jpeg, image/gif, */*"); 537 urlConn.setReadTimeout(readTimeout); // 30 seconds read timeout 538 urlConn.setConnectTimeout(connectTimeout); 539 if (headers != null) { 540 urlConn.setHeaders(headers); 541 } 542 543 final boolean noCache = force 544 // To remove when switching to Java 11 545 // Workaround for https://bugs.openjdk.java.net/browse/JDK-8146450 546 || (Utils.getJavaVersion() == 8 && Utils.isRunningJavaWebStart()); 547 urlConn.useCache(!noCache); 548 549 return urlConn; 550 } 551 552 private boolean isCacheValidUsingHead() throws IOException { 553 final HttpClient.Response urlConn = getRequest("HEAD").connect(); 554 long lastModified = urlConn.getLastModified(); 555 boolean ret = (attributes.getEtag() != null && attributes.getEtag().equals(urlConn.getHeaderField("ETag"))) || 556 (lastModified != 0 && lastModified <= attributes.getLastModification()); 557 if (ret) { 558 // update attributes 559 attributes = parseHeaders(urlConn); 560 cache.put(getCacheKey(), cacheData, attributes); 561 } 562 return ret; 563 } 564 565 /** 566 * TODO: move to JobFactory 567 * cancels all outstanding tasks in the queue. 568 */ 569 public void cancelOutstandingTasks() { 570 for (Runnable r: downloadJobExecutor.getQueue()) { 571 if (downloadJobExecutor.remove(r) && r instanceof JCSCachedTileLoaderJob) { 572 ((JCSCachedTileLoaderJob<?, ?>) r).handleJobCancellation(); 573 } 574 } 575 } 576 577 /** 578 * Sets a job, that will be run, when job will finish execution 579 * @param runnable that will be executed 580 */ 581 public void setFinishedTask(Runnable runnable) { 582 this.finishTask = runnable; 583 584 } 585 586 /** 587 * Marks this job as canceled 588 */ 589 public void handleJobCancellation() { 590 finishLoading(LoadResult.CANCELED); 591 } 592 593 private URL getUrlNoException() { 594 try { 595 return getUrl(); 596 } catch (IOException e) { 597 Logging.trace(e); 598 return null; 599 } 600 } 601}