001// License: GPL. For details, see LICENSE file.
002package org.openstreetmap.josm.tools;
003
004import java.io.IOException;
005import java.io.InputStream;
006import java.net.URL;
007import java.util.List;
008import java.util.Optional;
009import java.util.function.BiConsumer;
010import java.util.stream.Collectors;
011
012import javax.xml.parsers.ParserConfigurationException;
013import javax.xml.xpath.XPath;
014import javax.xml.xpath.XPathConstants;
015import javax.xml.xpath.XPathExpressionException;
016import javax.xml.xpath.XPathFactory;
017
018import org.openstreetmap.josm.data.Bounds;
019import org.openstreetmap.josm.data.coor.LatLon;
020import org.w3c.dom.Document;
021import org.w3c.dom.NamedNodeMap;
022import org.w3c.dom.Node;
023import org.w3c.dom.NodeList;
024import org.xml.sax.SAXException;
025
026/**
027 * Interaction with Mediawiki instances, such as the OSM wiki.
028 * @since 14641
029 */
030public class Mediawiki {
031
032    private final String baseUrl;
033
034    /**
035     * Constructs a new {@code Mediawiki} for the given base URL.
036     * @param baseUrl The wiki base URL
037     */
038    public Mediawiki(String baseUrl) {
039        this.baseUrl = baseUrl;
040    }
041
042    /**
043     * Determines which page exists on the Mediawiki instance.
044     * @param pages the pages to check
045     * @return the first existing page
046     * @throws IOException if any I/O error occurs
047     * @throws ParserConfigurationException if a parser cannot be created
048     * @throws SAXException if any XML error occurs
049     * @throws XPathExpressionException if any error in an XPath expression occurs
050     */
051    public Optional<String> findExistingPage(List<String> pages)
052            throws IOException, ParserConfigurationException, SAXException, XPathExpressionException {
053        List<String> distinctPages = pages.stream().distinct().collect(Collectors.toList());
054        // find a page that actually exists in the wiki
055        // API documentation: https://wiki.openstreetmap.org/w/api.php?action=help&modules=query
056        final URL url = new URL(baseUrl + "/w/api.php?action=query&format=xml&titles=" + distinctPages.stream()
057                .map(Utils::encodeUrl)
058                .collect(Collectors.joining(Utils.encodeUrl("|")))
059        );
060        final Document document = getDocument(url);
061        final XPath xPath = XPathFactory.newInstance().newXPath();
062        for (String page : distinctPages) {
063            String normalized = xPath.evaluate("/api/query/normalized/n[@from='" + page + "']/@to", document);
064            if (Utils.isEmpty(normalized)) {
065                normalized = page;
066            }
067            final Node node = (Node) xPath.evaluate("/api/query/pages/page[@title='" + normalized + "']", document, XPathConstants.NODE);
068            if (node != null
069                    && node.getAttributes().getNamedItem("missing") == null
070                    && node.getAttributes().getNamedItem("invalid") == null) {
071                return Optional.of(page);
072            }
073        }
074        return Optional.empty();
075    }
076
077    private Document getDocument(URL url) throws IOException, ParserConfigurationException, SAXException {
078        final HttpClient.Response conn = HttpClient.create(url).connect();
079        try (InputStream content = conn.getContent()) {
080            return XmlUtils.parseSafeDOM(content);
081        } finally {
082            conn.disconnect();
083        }
084    }
085
086    /**
087     * Searches geocoded images from <a href="https://commons.wikimedia.org/">Wikimedia Commons</a> for the given bounding box.
088     * @param bounds the bounds to load
089     * @param imageConsumer a consumer to receive the file title and the coordinates for every geocoded image
090     * @throws IOException if any I/O error occurs
091     * @throws ParserConfigurationException if a parser cannot be created
092     * @throws SAXException if any XML error occurs
093     * @throws XPathExpressionException if any error in an XPath expression occurs
094     */
095    public void searchGeoImages(Bounds bounds, BiConsumer<String, LatLon> imageConsumer)
096            throws IOException, ParserConfigurationException, SAXException, XPathExpressionException {
097        final URL url = new URL(getGeoImagesUrl(baseUrl, bounds));
098        final Document document = getDocument(url);
099        final XPath xPath = XPathFactory.newInstance().newXPath();
100        NodeList nodes = (NodeList) xPath.evaluate("/api/query/geosearch/gs", document, XPathConstants.NODESET);
101        for (int i = 0; i < nodes.getLength(); i++) {
102            NamedNodeMap attributes = nodes.item(i).getAttributes();
103            String title = attributes.getNamedItem("title").getNodeValue();
104            double lat = Double.parseDouble(attributes.getNamedItem("lat").getNodeValue());
105            double lon = Double.parseDouble(attributes.getNamedItem("lon").getNodeValue());
106            imageConsumer.accept(title, new LatLon(lat, lon));
107        }
108    }
109
110    /**
111     * Returns the URL for searching geolocated images in given bounds.
112     * @param baseUrl The wiki base URL
113     * @param bounds the bounds of the search area
114     * @return the URL for searching geolocated images in given bounds
115     * @since 18046
116     */
117    public static String getGeoImagesUrl(String baseUrl, Bounds bounds) {
118        String sep = Utils.encodeUrl("|");
119        return baseUrl +
120                "?format=xml" +
121                "&action=query" +
122                "&list=geosearch" +
123                "&gsnamespace=6" +
124                "&gslimit=500" +
125                "&gsprop=type" + sep + "name" +
126                "&gsbbox=" + bounds.getMaxLat() + sep + bounds.getMinLon() + sep + bounds.getMinLat() + sep + bounds.getMaxLon();
127    }
128
129    /**
130     * Computes the URL for the given filename on the MediaWiki server
131     * @param fileBaseUrl the base URL of the file MediaWiki storage, such as {@code "https://upload.wikimedia.org/wikipedia/commons/"}
132     * @param filename    the filename
133     * @return the URL for the given filename on the MediaWiki server
134     * @see <a href="https://www.mediawiki.org/wiki/Manual:$wgHashedUploadDirectory">MediaWiki $wgHashedUploadDirectory</a>
135     */
136    public static String getImageUrl(String fileBaseUrl, String filename) {
137        final String md5 = Utils.md5Hex(filename);
138        return String.join("/", Utils.strip(fileBaseUrl, "/"), md5.substring(0, 1), md5.substring(0, 2), filename);
139    }
140}