001// License: GPL. For details, see LICENSE file.
002package org.openstreetmap.josm.tools;
003
004import static org.openstreetmap.josm.tools.I18n.tr;
005import static org.openstreetmap.josm.tools.I18n.trn;
006
007import java.util.LinkedHashMap;
008import java.util.Map;
009import java.util.Map.Entry;
010import java.util.regex.Matcher;
011import java.util.regex.Pattern;
012
013import org.openstreetmap.josm.spi.preferences.Config;
014
015/**
016 * Class that helps to parse tags from arbitrary text
017 */
018public final class TextTagParser {
019
020    // properties need JOSM restart to apply, modified rarely enough
021    private static final int MAX_KEY_LENGTH = Config.getPref().getInt("tags.paste.max-key-length", 50);
022    private static final int MAX_KEY_COUNT = Config.getPref().getInt("tags.paste.max-key-count", 30);
023    private static final String KEY_PATTERN = Config.getPref().get("tags.paste.tag-pattern", "[0-9a-zA-Z:_]*");
024    private static final int MAX_VALUE_LENGTH = 255;
025
026    private TextTagParser() {
027        // Hide default constructor for utils classes
028    }
029
030    static String unescape(String k) {
031        if (!(k.startsWith("\"") && k.endsWith("\""))) {
032            if (k.contains("=")) {
033                // '=' not in quotes will be treated as an error!
034                return null;
035            } else {
036                return k;
037            }
038        }
039        String text = k.substring(1, k.length()-1);
040        return new TextAnalyzer(text).parseString("\r\t\n");
041    }
042
043    /**
044     * Try to find tag-value pairs in given text
045     * @param text - text in which tags are looked for
046     * @param splitRegex - text is split into parts with this delimiter
047     * @param tagRegex - each part is matched against this regex
048     * @param unescapeTextInQuotes - if true, matched tag and value will be analyzed more thoroughly
049     * @return map of tags
050     */
051    public static Map<String, String> readTagsByRegexp(String text, String splitRegex, String tagRegex, boolean unescapeTextInQuotes) {
052         String[] lines = text.split(splitRegex, -1);
053         Pattern p = Pattern.compile(tagRegex);
054         Map<String, String> tags = new LinkedHashMap<>();
055         String k;
056         String v;
057         for (String line: lines) {
058            if (line.trim().isEmpty()) continue; // skip empty lines
059            Matcher m = p.matcher(line);
060            if (m.matches()) {
061                 k = Utils.removeWhiteSpaces(m.group(1));
062                 v = Utils.removeWhiteSpaces(m.group(2));
063                 if (unescapeTextInQuotes) {
064                     k = unescape(k);
065                     v = unescape(v);
066                     if (k == null || v == null) return null;
067                 }
068                 tags.put(k, v);
069            } else {
070                return null;
071            }
072         }
073         if (!tags.isEmpty()) {
074            return tags;
075         } else {
076            return null;
077         }
078    }
079
080    /**
081     * Gets a list of tags that are in the given text
082     * @param buf The text to parse
083     * @param callback warning callback
084     * @return The tags or <code>null</code> if the tags are not valid
085     * @since 12683
086     */
087    public static Map<String, String> getValidatedTagsFromText(String buf, TagWarningCallback callback) {
088        Map<String, String> tags = readTagsFromText(buf);
089        return validateTags(tags, callback) ? tags : null;
090    }
091
092    /**
093     * Apply different methods to extract tag-value pairs from arbitrary text
094     * @param buf buffer
095     * @return null if no format is suitable
096     */
097    public static Map<String, String> readTagsFromText(String buf) {
098        Map<String, String> tags;
099
100        // Format
101        // tag1\tval1\ntag2\tval2\n
102        tags = readTagsByRegexp(buf, "[\\r\\n]+", ".*?([a-zA-Z0-9:_]+).*\\t(.*?)", false);
103        // try "tag\tvalue\n" format
104        if (tags != null) return tags;
105
106        // Format
107        // a=b \n c=d \n "a b"=hello
108        // SORRY: "a=b" = c is not supported for now, only first = will be considered
109        // a = "b=c" is OK
110        // a = b=c  - this method of parsing fails intentionally
111        tags = readTagsByRegexp(buf, "[\\n\\t\\r]+", "(.*?)=(.*?)", true);
112        // try format  t1=v1\n t2=v2\n ...
113        if (tags != null) return tags;
114
115        // JSON-format
116        String bufJson = buf.trim();
117        // trim { }, if there are any
118        if (bufJson.startsWith("{") && bufJson.endsWith("}"))
119            bufJson = bufJson.substring(1, bufJson.length()-1);
120        tags = readTagsByRegexp(bufJson, "[\\s]*,[\\s]*",
121                "[\\s]*(\\\".*?[^\\\\]\\\")"+"[\\s]*:[\\s]*"+"(\\\".*?[^\\\\]\\\")[\\s]*", true);
122        if (tags != null) return tags;
123
124        // Free format
125        // a 1 "b" 2 c=3 d 4 e "5"
126        return new TextAnalyzer(buf).getFreeParsedTags();
127    }
128
129    /**
130     * Check tags for correctness and display warnings if needed
131     * @param tags - map key-&gt;value to check
132     * @param callback warning callback
133     * @return true if the tags should be pasted
134     * @since 12683
135     */
136    public static boolean validateTags(Map<String, String> tags, TagWarningCallback callback) {
137        int r;
138        int s = tags.size();
139        if (s > MAX_KEY_COUNT) {
140            // Use trn() even if for english it makes no sense, as s > 30
141            r = callback.warning(trn("There was {0} tag found in the buffer, it is suspicious!",
142            "There were {0} tags found in the buffer, it is suspicious!", s,
143            s), "", "tags.paste.toomanytags");
144            if (r == 2 || r == 3) return false; if (r == 4) return true;
145        }
146        for (Entry<String, String> entry : tags.entrySet()) {
147            String key = entry.getKey();
148            String value = entry.getValue();
149            if (key.length() > MAX_KEY_LENGTH) {
150                r = callback.warning(tr("Key is too long (max {0} characters):", MAX_KEY_LENGTH), key+'='+value, "tags.paste.keytoolong");
151                if (r == 2 || r == 3) return false; if (r == 4) return true;
152            }
153            if (!key.matches(KEY_PATTERN)) {
154                r = callback.warning(tr("Suspicious characters in key:"), key, "tags.paste.keydoesnotmatch");
155                if (r == 2 || r == 3) return false; if (r == 4) return true;
156            }
157            if (value.length() > MAX_VALUE_LENGTH) {
158                r = callback.warning(tr("Value is too long (max {0} characters):", MAX_VALUE_LENGTH), value, "tags.paste.valuetoolong");
159                if (r == 2 || r == 3) return false; if (r == 4) return true;
160            }
161        }
162        return true;
163    }
164
165    /**
166     * Called when a problematic tag is encountered.
167     * @since 12683
168     */
169    @FunctionalInterface
170    public interface TagWarningCallback {
171        /**
172         * Displays a warning about a problematic tag and ask user what to do about it.
173         * @param text Message to display
174         * @param data Tag key and/or value
175         * @param code to use with {@code ExtendedDialog#toggleEnable(String)}
176         * @return 1 to validate and display next warnings if any, 2 to cancel operation, 3 to clear buffer, 4 to paste tags
177         */
178        int warning(String text, String data, String code);
179    }
180}