001// License: GPL. For details, see LICENSE file.
002package org.openstreetmap.josm.data.osm.search;
003
004import static org.openstreetmap.josm.tools.I18n.marktr;
005import static org.openstreetmap.josm.tools.I18n.tr;
006
007import java.io.IOException;
008import java.io.Reader;
009import java.util.Arrays;
010import java.util.List;
011import java.util.Objects;
012
013import org.openstreetmap.josm.tools.JosmRuntimeException;
014
015/**
016 * This class is used to parse a search string and split it into tokens.
017 * It provides methods to parse numbers and extract strings.
018 * @since 12656 (moved from actions.search package)
019 */
020public class PushbackTokenizer {
021
022    /**
023     * A range of long numbers. Immutable
024     */
025    public static class Range {
026        private final long start;
027        private final long end;
028
029        /**
030         * Create a new range
031         * @param start The start
032         * @param end The end (inclusive)
033         */
034        public Range(long start, long end) {
035            this.start = start;
036            this.end = end;
037        }
038
039        /**
040         * Returns the range start.
041         * @return The start
042         */
043        public long getStart() {
044            return start;
045        }
046
047        /**
048         * Returns the range end.
049         * @return The end (inclusive)
050         */
051        public long getEnd() {
052            return end;
053        }
054
055        @Override
056        public String toString() {
057            return "Range [start=" + start + ", end=" + end + ']';
058        }
059    }
060
061    private final Reader search;
062
063    private Token currentToken;
064    private String currentText;
065    private Long currentNumber;
066    private Long currentRange;
067    private int c;
068    private boolean isRange;
069
070    /**
071     * Creates a new {@link PushbackTokenizer}
072     * @param search The search string reader to read the tokens from
073     */
074    public PushbackTokenizer(Reader search) {
075        this.search = search;
076        getChar();
077    }
078
079    /**
080     * The token types that may be read
081     */
082    public enum Token {
083        /**
084         * Not token (-)
085         */
086        NOT(marktr("<not>")),
087        /**
088         * Or token (or) (|)
089         */
090        OR(marktr("<or>")),
091        /**
092         * Xor token (xor) (^)
093         */
094        XOR(marktr("<xor>")),
095        /**
096         * opening parentheses token (
097         */
098        LEFT_PARENT(marktr("<left parent>")),
099        /**
100         * closing parentheses token )
101         */
102        RIGHT_PARENT(marktr("<right parent>")),
103        /**
104         * Colon :
105         */
106        COLON(marktr("<colon>")),
107        /**
108         * The equals sign (=)
109         */
110        EQUALS(marktr("<equals>")),
111        /**
112         * The tilde sign (~)
113         */
114        TILDE(marktr("<tilde>")),
115        /**
116         * A text
117         */
118        KEY(marktr("<key>")),
119        /**
120         * A question mark (?)
121         */
122        QUESTION_MARK(marktr("<question mark>")),
123        /**
124         * Marks the end of the input
125         */
126        EOF(marktr("<end-of-file>")),
127        /**
128         * Less than sign (&lt;)
129         */
130        LESS_THAN("<less-than>"),
131        /**
132         * Greater than sign (&gt;)
133         */
134        GREATER_THAN("<greater-than>");
135
136        Token(String name) {
137            this.name = name;
138        }
139
140        private final String name;
141
142        @Override
143        public String toString() {
144            return tr(name);
145        }
146    }
147
148    private void getChar() {
149        try {
150            c = search.read();
151        } catch (IOException e) {
152            throw new JosmRuntimeException(e.getMessage(), e);
153        }
154    }
155
156    private static final List<Character> SPECIAL_CHARS = Arrays.asList('"', ':', '(', ')', '|', '^', '=', '~', '?', '<', '>');
157    private static final List<Character> SPECIAL_CHARS_QUOTED = Arrays.asList('"');
158
159    private String getString(boolean quoted) {
160        List<Character> sChars = quoted ? SPECIAL_CHARS_QUOTED : SPECIAL_CHARS;
161        StringBuilder s = new StringBuilder();
162        boolean escape = false;
163        while (c != -1 && (escape || (!sChars.contains((char) c) && (quoted || !Character.isWhitespace(c))))) {
164            if (c == '\\' && !escape) {
165                escape = true;
166            } else {
167                s.append((char) c);
168                escape = false;
169            }
170            getChar();
171        }
172        return s.toString();
173    }
174
175    private String getString() {
176        return getString(false);
177    }
178
179    /**
180     * The token returned is <code>null</code> or starts with an identifier character:
181     * - for an '-'. This will be the only character
182     * : for an key. The value is the next token
183     * | for "OR"
184     * ^ for "XOR"
185     * ' ' for anything else.
186     * @return The next token in the stream.
187     */
188    public Token nextToken() {
189        if (currentToken != null) {
190            Token result = currentToken;
191            currentToken = null;
192            return result;
193        }
194
195        while (Character.isWhitespace(c)) {
196            getChar();
197        }
198        switch (c) {
199        case -1:
200            getChar();
201            return Token.EOF;
202        case ':':
203            getChar();
204            return Token.COLON;
205        case '=':
206            getChar();
207            return Token.EQUALS;
208        case '~':
209            getChar();
210            return Token.TILDE;
211        case '<':
212            getChar();
213            return Token.LESS_THAN;
214        case '>':
215            getChar();
216            return Token.GREATER_THAN;
217        case '(':
218            getChar();
219            return Token.LEFT_PARENT;
220        case ')':
221            getChar();
222            return Token.RIGHT_PARENT;
223        case '|':
224            getChar();
225            return Token.OR;
226        case '^':
227            getChar();
228            return Token.XOR;
229        case '&':
230            getChar();
231            return nextToken();
232        case '?':
233            getChar();
234            return Token.QUESTION_MARK;
235        case '"':
236            getChar();
237            currentText = getString(true);
238            getChar();
239            return Token.KEY;
240        default:
241            String prefix = "";
242            if (c == '-') {
243                getChar();
244                if (!Character.isDigit(c))
245                    return Token.NOT;
246                prefix = "-";
247            }
248            currentText = prefix + getString();
249            if ("or".equalsIgnoreCase(currentText))
250                return Token.OR;
251            else if ("xor".equalsIgnoreCase(currentText))
252                return Token.XOR;
253            else if ("and".equalsIgnoreCase(currentText))
254                return nextToken();
255            // try parsing number
256            try {
257                currentNumber = Long.valueOf(currentText);
258            } catch (NumberFormatException e) {
259                currentNumber = null;
260            }
261            // if text contains "-", try parsing a range
262            int pos = currentText.indexOf('-', 1);
263            isRange = pos > 0;
264            if (isRange) {
265                try {
266                    currentNumber = Long.valueOf(currentText.substring(0, pos));
267                } catch (NumberFormatException e) {
268                    currentNumber = null;
269                }
270                try {
271                    currentRange = Long.valueOf(currentText.substring(pos + 1));
272                } catch (NumberFormatException e) {
273                    currentRange = null;
274                    }
275                } else {
276                    currentRange = null;
277                }
278            return Token.KEY;
279        }
280    }
281
282    /**
283     * Reads the next token if it is equal to the given, suggested token
284     * @param token The token the next one should be equal to
285     * @return <code>true</code> if it has been read
286     */
287    public boolean readIfEqual(Token token) {
288        Token nextTok = nextToken();
289        if (Objects.equals(nextTok, token))
290            return true;
291        currentToken = nextTok;
292        return false;
293    }
294
295    /**
296     * Reads the next token. If it is a text, return that text. If not, advance
297     * @return the text or <code>null</code> if the reader was advanced
298     */
299    public String readTextOrNumber() {
300        Token nextTok = nextToken();
301        if (nextTok == Token.KEY)
302            return currentText;
303        currentToken = nextTok;
304        return null;
305    }
306
307    /**
308     * Reads a number
309     * @param errorMessage The error if the number cannot be read
310     * @return The number that was found
311     * @throws SearchParseError if there is no number
312     */
313    public long readNumber(String errorMessage) throws SearchParseError {
314        if ((nextToken() == Token.KEY) && (currentNumber != null))
315            return currentNumber;
316        else
317            throw new SearchParseError(errorMessage);
318    }
319
320    /**
321     * Gets the last number that was read
322     * @return The last number
323     */
324    public long getReadNumber() {
325        return (currentNumber != null) ? currentNumber : 0;
326    }
327
328    /**
329     * Reads a range of numbers
330     * @param errorMessage The error if the input is malformed
331     * @return The range that was found
332     * @throws SearchParseError If the input is not as expected for a range
333     */
334    public Range readRange(String errorMessage) throws SearchParseError {
335        if (nextToken() != Token.KEY || (currentNumber == null && currentRange == null)) {
336            throw new SearchParseError(errorMessage);
337        } else if (!isRange && currentNumber != null) {
338            if (currentNumber >= 0) {
339                return new Range(currentNumber, currentNumber);
340            } else {
341                return new Range(0, Math.abs(currentNumber));
342            }
343        } else if (isRange && currentRange == null) {
344            return new Range(currentNumber, Long.MAX_VALUE);
345        } else if (currentNumber != null && currentRange != null) {
346            return new Range(currentNumber, currentRange);
347        } else {
348            throw new SearchParseError(errorMessage);
349        }
350    }
351
352    /**
353     * Gets the last text that was found
354     * @return The text
355     */
356    public String getText() {
357        return currentText;
358    }
359}