001// License: GPL. For details, see LICENSE file. 002package org.openstreetmap.josm.tools; 003 004import java.io.IOException; 005import java.io.InputStream; 006import java.net.URL; 007import java.util.List; 008import java.util.Optional; 009import java.util.function.BiConsumer; 010import java.util.stream.Collectors; 011 012import javax.xml.parsers.ParserConfigurationException; 013import javax.xml.xpath.XPath; 014import javax.xml.xpath.XPathConstants; 015import javax.xml.xpath.XPathExpressionException; 016import javax.xml.xpath.XPathFactory; 017 018import org.openstreetmap.josm.data.Bounds; 019import org.openstreetmap.josm.data.coor.LatLon; 020import org.w3c.dom.Document; 021import org.w3c.dom.NamedNodeMap; 022import org.w3c.dom.Node; 023import org.w3c.dom.NodeList; 024import org.xml.sax.SAXException; 025 026/** 027 * Interaction with Mediawiki instances, such as the OSM wiki. 028 * @since 14641 029 */ 030public class Mediawiki { 031 032 private final String baseUrl; 033 034 /** 035 * Constructs a new {@code Mediawiki} for the given base URL. 036 * @param baseUrl The wiki base URL 037 */ 038 public Mediawiki(String baseUrl) { 039 this.baseUrl = baseUrl; 040 } 041 042 /** 043 * Determines which page exists on the Mediawiki instance. 044 * @param pages the pages to check 045 * @return the first existing page 046 * @throws IOException if any I/O error occurs 047 * @throws ParserConfigurationException if a parser cannot be created 048 * @throws SAXException if any XML error occurs 049 * @throws XPathExpressionException if any error in an XPath expression occurs 050 */ 051 public Optional<String> findExistingPage(List<String> pages) 052 throws IOException, ParserConfigurationException, SAXException, XPathExpressionException { 053 List<String> distinctPages = pages.stream().distinct().collect(Collectors.toList()); 054 // find a page that actually exists in the wiki 055 // API documentation: https://wiki.openstreetmap.org/w/api.php?action=help&modules=query 056 final URL url = new URL(baseUrl + "/w/api.php?action=query&format=xml&titles=" + distinctPages.stream() 057 .map(Utils::encodeUrl) 058 .collect(Collectors.joining(Utils.encodeUrl("|"))) 059 ); 060 final Document document = getDocument(url); 061 final XPath xPath = XPathFactory.newInstance().newXPath(); 062 for (String page : distinctPages) { 063 String normalized = xPath.evaluate("/api/query/normalized/n[@from='" + page + "']/@to", document); 064 if (Utils.isEmpty(normalized)) { 065 normalized = page; 066 } 067 final Node node = (Node) xPath.evaluate("/api/query/pages/page[@title='" + normalized + "']", document, XPathConstants.NODE); 068 if (node != null 069 && node.getAttributes().getNamedItem("missing") == null 070 && node.getAttributes().getNamedItem("invalid") == null) { 071 return Optional.of(page); 072 } 073 } 074 return Optional.empty(); 075 } 076 077 private Document getDocument(URL url) throws IOException, ParserConfigurationException, SAXException { 078 final HttpClient.Response conn = HttpClient.create(url).connect(); 079 try (InputStream content = conn.getContent()) { 080 return XmlUtils.parseSafeDOM(content); 081 } finally { 082 conn.disconnect(); 083 } 084 } 085 086 /** 087 * Searches geocoded images from <a href="https://commons.wikimedia.org/">Wikimedia Commons</a> for the given bounding box. 088 * @param bounds the bounds to load 089 * @param imageConsumer a consumer to receive the file title and the coordinates for every geocoded image 090 * @throws IOException if any I/O error occurs 091 * @throws ParserConfigurationException if a parser cannot be created 092 * @throws SAXException if any XML error occurs 093 * @throws XPathExpressionException if any error in an XPath expression occurs 094 */ 095 public void searchGeoImages(Bounds bounds, BiConsumer<String, LatLon> imageConsumer) 096 throws IOException, ParserConfigurationException, SAXException, XPathExpressionException { 097 final URL url = new URL(getGeoImagesUrl(baseUrl, bounds)); 098 final Document document = getDocument(url); 099 final XPath xPath = XPathFactory.newInstance().newXPath(); 100 NodeList nodes = (NodeList) xPath.evaluate("/api/query/geosearch/gs", document, XPathConstants.NODESET); 101 for (int i = 0; i < nodes.getLength(); i++) { 102 NamedNodeMap attributes = nodes.item(i).getAttributes(); 103 String title = attributes.getNamedItem("title").getNodeValue(); 104 double lat = Double.parseDouble(attributes.getNamedItem("lat").getNodeValue()); 105 double lon = Double.parseDouble(attributes.getNamedItem("lon").getNodeValue()); 106 imageConsumer.accept(title, new LatLon(lat, lon)); 107 } 108 } 109 110 /** 111 * Returns the URL for searching geolocated images in given bounds. 112 * @param baseUrl The wiki base URL 113 * @param bounds the bounds of the search area 114 * @return the URL for searching geolocated images in given bounds 115 * @since 18046 116 */ 117 public static String getGeoImagesUrl(String baseUrl, Bounds bounds) { 118 String sep = Utils.encodeUrl("|"); 119 return baseUrl + 120 "?format=xml" + 121 "&action=query" + 122 "&list=geosearch" + 123 "&gsnamespace=6" + 124 "&gslimit=500" + 125 "&gsprop=type" + sep + "name" + 126 "&gsbbox=" + bounds.getMaxLat() + sep + bounds.getMinLon() + sep + bounds.getMinLat() + sep + bounds.getMaxLon(); 127 } 128 129 /** 130 * Computes the URL for the given filename on the MediaWiki server 131 * @param fileBaseUrl the base URL of the file MediaWiki storage, such as {@code "https://upload.wikimedia.org/wikipedia/commons/"} 132 * @param filename the filename 133 * @return the URL for the given filename on the MediaWiki server 134 * @see <a href="https://www.mediawiki.org/wiki/Manual:$wgHashedUploadDirectory">MediaWiki $wgHashedUploadDirectory</a> 135 */ 136 public static String getImageUrl(String fileBaseUrl, String filename) { 137 final String md5 = Utils.md5Hex(filename); 138 return String.join("/", Utils.strip(fileBaseUrl, "/"), md5.substring(0, 1), md5.substring(0, 2), filename); 139 } 140}