001// License: GPL. For details, see LICENSE file.
002package org.openstreetmap.josm.tools;
003
004import static org.openstreetmap.josm.tools.I18n.tr;
005import static org.openstreetmap.josm.tools.I18n.trn;
006
007import java.io.IOException;
008import java.io.InputStream;
009import java.net.MalformedURLException;
010import java.net.URL;
011import java.util.Arrays;
012import java.util.Collections;
013import java.util.HashMap;
014import java.util.List;
015import java.util.Map;
016import java.util.Objects;
017import java.util.Optional;
018import java.util.function.Supplier;
019import java.util.function.UnaryOperator;
020import java.util.regex.Matcher;
021import java.util.regex.Pattern;
022import java.util.stream.Collectors;
023
024import javax.json.Json;
025import javax.json.JsonArray;
026import javax.json.JsonReader;
027import javax.json.JsonValue;
028
029import org.openstreetmap.josm.data.osm.OsmPrimitiveType;
030import org.openstreetmap.josm.data.osm.OsmUtils;
031import org.openstreetmap.josm.data.preferences.CachingProperty;
032import org.openstreetmap.josm.data.preferences.ListProperty;
033import org.openstreetmap.josm.io.CachedFile;
034
035/**
036 * Extracts web links from OSM tags.
037 *
038 * The following rules are used:
039 * <ul>
040 * <li>internal rules for basic tags</li>
041 * <li>rules from Wikidata based on OSM tag or key (P1282); formatter URL (P1630); third-party formatter URL (P3303)</li>
042 * <li>rules from OSM Sophox based on permanent key ID (P16); formatter URL (P8)</li>
043 * </ul>
044 *
045 * @since 15673
046 */
047public final class Tag2Link {
048
049    // Related implementations:
050    // - https://github.com/openstreetmap/openstreetmap-website/blob/master/app/helpers/browse_tags_helper.rb
051
052    /**
053     * Maps OSM keys to formatter URLs from Wikidata and OSM Sophox where {@code "$1"} has to be replaced by a value.
054     */
055    static final MultiMap<String, String> wikidataRules = new MultiMap<>();
056
057    static final Map<String, UnaryOperator<String>> valueFormatter = Collections.singletonMap(
058            "ref:bag", v -> String.format("%16s", v).replace(' ', '0')
059    );
060
061    static final String languagePattern = LanguageInfo.getLanguageCodes(null).stream()
062            .map(Pattern::quote)
063            .collect(Collectors.joining("|"));
064
065    static final ListProperty PREF_SOURCE = new ListProperty("tag2link.source",
066            Collections.singletonList("resource://META-INF/resources/webjars/tag2link/2021.3.21/index.json"));
067
068    static final CachingProperty<List<String>> PREF_SEARCH_ENGINES = new ListProperty("tag2link.search",
069            Arrays.asList("https://duckduckgo.com/?q=$1", "https://www.google.com/search?q=$1")).cached();
070
071    private Tag2Link() {
072        // private constructor for utility class
073    }
074
075    /**
076     * Represents an operation that accepts a link.
077     */
078    @FunctionalInterface
079    public interface LinkConsumer {
080        /**
081         * Performs the operation on the given arguments.
082         * @param name the name/label of the link
083         * @param url the URL of the link
084         * @param icon the icon to use
085         */
086        void acceptLink(String name, String url, ImageResource icon);
087    }
088
089    /**
090     * Initializes the tag2link rules
091     */
092    public static void initialize() {
093        try {
094            wikidataRules.clear();
095            for (String source : PREF_SOURCE.get()) {
096                initializeFromResources(new CachedFile(source));
097            }
098        } catch (Exception e) {
099            Logging.error("Failed to initialize tag2link rules");
100            Logging.error(e);
101        }
102    }
103
104    /**
105     * Initializes the tag2link rules from the resources.
106     *
107     * @param resource the source
108     * @throws IOException in case of I/O error
109     */
110    private static void initializeFromResources(CachedFile resource) throws IOException {
111        final JsonArray rules;
112        try (InputStream inputStream = resource.getInputStream();
113             JsonReader jsonReader = Json.createReader(inputStream)) {
114            rules = jsonReader.readArray();
115        }
116
117        for (JsonValue rule : rules) {
118            final String key = rule.asJsonObject().getString("key");
119            final String url = rule.asJsonObject().getString("url");
120            if (key.startsWith("Key:")) {
121                wikidataRules.put(key.substring("Key:".length()), url);
122            }
123        }
124        // We handle those keys ourselves
125        wikidataRules.keySet().removeIf(key -> key.matches("^(.+[:_])?website([:_].+)?$")
126                || key.matches("^(.+[:_])?url([:_].+)?$")
127                || key.matches("wikimedia_commons|image")
128                || key.matches("wikipedia(:(?<lang>\\p{Lower}{2,}))?")
129                || key.matches("(.*:)?wikidata"));
130
131        final int size = wikidataRules.size();
132        Logging.info(trn(
133                "Obtained {0} Tag2Link rule from {1}",
134                "Obtained {0} Tag2Link rules from {1}",
135                size, size, resource));
136    }
137
138    /**
139     * Generates the links for the tag given by {@code key} and {@code value}, and sends 0, 1 or more links to the {@code linkConsumer}.
140     * @param key the tag key
141     * @param value the tag value
142     * @param linkConsumer the receiver of the generated links
143     */
144    public static void getLinksForTag(String key, String value, LinkConsumer linkConsumer) {
145
146        if (Utils.isEmpty(value)) {
147            return;
148        }
149
150        final HashMap<OsmPrimitiveType, Optional<ImageResource>> memoize = new HashMap<>();
151        final Supplier<ImageResource> imageResource = () -> memoize
152                .computeIfAbsent(OsmPrimitiveType.NODE, type -> OsmPrimitiveImageProvider.getResource(key, value, type))
153                .orElse(null);
154
155        // Search
156        if (key.matches("^(.+[:_])?name([:_]" + languagePattern + ")?$")) {
157            final ImageResource search = new ImageProvider("dialogs/search").getResource();
158            PREF_SEARCH_ENGINES.get().forEach(url ->
159                    linkConsumer.acceptLink(tr("Search on {0}", getHost(url, url)), url.replace("$1", Utils.encodeUrl(value)), search));
160        }
161
162        // Common
163        final List<String> validURLs = value.startsWith("http:") || value.startsWith("https:") || value.startsWith("www.")
164                ? OsmUtils.splitMultipleValues(value)
165                .map(v -> v.startsWith("http:") || v.startsWith("https:")
166                        ? v
167                        : v.startsWith("www.")
168                        ? "http://" + v
169                        : null)
170                .filter(Objects::nonNull)
171                .collect(Collectors.toList())
172                : Collections.emptyList();
173        if (key.matches("^(.+[:_])?website([:_].+)?$") && !validURLs.isEmpty()) {
174            validURLs.forEach(validURL -> linkConsumer.acceptLink(getLinkName(validURL, key), validURL, imageResource.get()));
175        }
176        if (key.matches("^(.+[:_])?source([:_].+)?$") && !validURLs.isEmpty()) {
177            validURLs.forEach(validURL -> linkConsumer.acceptLink(getLinkName(validURL, key), validURL, imageResource.get()));
178        }
179        if (key.matches("^(.+[:_])?url([:_].+)?$") && !validURLs.isEmpty()) {
180            validURLs.forEach(validURL -> linkConsumer.acceptLink(getLinkName(validURL, key), validURL, imageResource.get()));
181        }
182        if (key.matches("image") && !validURLs.isEmpty()) {
183            validURLs.forEach(validURL -> linkConsumer.acceptLink(tr("View image"), validURL, imageResource.get()));
184        }
185
186        // Wikimedia
187        final Matcher keyMatcher = Pattern.compile("wikipedia(:(?<lang>\\p{Lower}{2,}))?").matcher(key);
188        final Matcher valueMatcher = Pattern.compile("((?<lang>\\p{Lower}{2,}):)?(?<article>.*)").matcher(value);
189        if (keyMatcher.matches() && valueMatcher.matches()) {
190            final String lang = Utils.firstNotEmptyString("en", keyMatcher.group("lang"), valueMatcher.group("lang"));
191            final String url = "https://" + lang + ".wikipedia.org/wiki/" + valueMatcher.group("article").replace(' ', '_');
192            linkConsumer.acceptLink(tr("View Wikipedia article"), url, imageResource.get());
193        }
194        if (key.matches("(.*:)?wikidata")) {
195            OsmUtils.splitMultipleValues(value).forEach(q -> linkConsumer.acceptLink(
196                    tr("View Wikidata item"), "https://www.wikidata.org/wiki/" + q, imageResource.get()));
197        }
198        if (key.matches("(.*:)?species")) {
199            final String url = "https://species.wikimedia.org/wiki/" + value;
200            linkConsumer.acceptLink(getLinkName(url, key), url, imageResource.get());
201        }
202        if (key.matches("wikimedia_commons|image") && value.matches("(?i:File):.*")) {
203            OsmUtils.splitMultipleValues(value).forEach(i -> linkConsumer.acceptLink(
204                    tr("View image on Wikimedia Commons"), getWikimediaCommonsUrl(i), imageResource.get()));
205        }
206        if (key.matches("wikimedia_commons|image") && value.matches("(?i:Category):.*")) {
207            OsmUtils.splitMultipleValues(value).forEach(i -> linkConsumer.acceptLink(
208                    tr("View category on Wikimedia Commons"), getWikimediaCommonsUrl(i), imageResource.get()));
209        }
210
211        wikidataRules.getValues(key).forEach(urlFormatter -> {
212            final String formattedValue = valueFormatter.getOrDefault(key, x -> x).apply(value);
213            final String url = urlFormatter.replace("$1", formattedValue);
214            linkConsumer.acceptLink(getLinkName(url, key), url, imageResource.get());
215        });
216    }
217
218    private static String getWikimediaCommonsUrl(String i) {
219        i = i.replace(' ', '_');
220        i = Utils.encodeUrl(i);
221        return "https://commons.wikimedia.org/wiki/" + i;
222    }
223
224    private static String getLinkName(String url, String fallback) {
225        return tr("Open {0}", getHost(url, fallback));
226    }
227
228    private static String getHost(String url, String fallback) {
229        try {
230            return new URL(url).getHost().replaceFirst("^www\\.", "");
231        } catch (MalformedURLException e) {
232            return fallback;
233        }
234    }
235
236}