001// License: GPL. For details, see LICENSE file.
002package org.openstreetmap.josm.tools.template_engine;
003
004import java.util.Arrays;
005import java.util.HashSet;
006import java.util.Set;
007
008/**
009 * This class converts a template string (stream of characters) into a stream of tokens.
010 *
011 * The result of the tokenization (also called lexical analysis) serves as input for the
012 * parser {@link TemplateParser}.
013 */
014public class Tokenizer {
015
016    public static class Token {
017        private final TokenType type;
018        private final int position;
019        private final String text;
020
021        public Token(TokenType type, int position) {
022            this(type, position, null);
023        }
024
025        public Token(TokenType type, int position, String text) {
026            this.type = type;
027            this.position = position;
028            this.text = text;
029        }
030
031        public TokenType getType() {
032            return type;
033        }
034
035        public int getPosition() {
036            return position;
037        }
038
039        public String getText() {
040            return text;
041        }
042
043        @Override
044        public String toString() {
045            return type + (text != null ? ' ' + text : "");
046        }
047    }
048
049    public enum TokenType { CONDITION_START, VARIABLE_START, CONTEXT_SWITCH_START, END, PIPE, APOSTROPHE, TEXT, EOF }
050
051    private final Set<Character> specialCharacters = new HashSet<>(Arrays.asList('$', '?', '{', '}', '|', '\'', '!'));
052
053    private final String template;
054
055    private int c;
056    private int index;
057    private Token currentToken;
058    private final StringBuilder text = new StringBuilder();
059
060    /**
061     * Creates a new {@link Tokenizer}
062     * @param template the template as a user input string
063     */
064    public Tokenizer(String template) {
065        this.template = template;
066        getChar();
067    }
068
069    private void getChar() {
070        if (index >= template.length()) {
071            c = -1;
072        } else {
073            c = template.charAt(index++);
074        }
075    }
076
077    public Token nextToken() throws ParseError {
078        if (currentToken != null) {
079            Token result = currentToken;
080            currentToken = null;
081            return result;
082        }
083        int position = index;
084
085        text.setLength(0);
086        switch (c) {
087        case -1:
088            return new Token(TokenType.EOF, position);
089        case '{':
090            getChar();
091            return new Token(TokenType.VARIABLE_START, position);
092        case '?':
093            getChar();
094            if (c == '{') {
095                getChar();
096                return new Token(TokenType.CONDITION_START, position);
097            } else
098                throw ParseError.unexpectedChar('{', (char) c, position);
099        case '!':
100            getChar();
101            if (c == '{') {
102                getChar();
103                return new Token(TokenType.CONTEXT_SWITCH_START, position);
104            } else
105                throw ParseError.unexpectedChar('{', (char) c, position);
106        case '}':
107            getChar();
108            return new Token(TokenType.END, position);
109        case '|':
110            getChar();
111            return new Token(TokenType.PIPE, position);
112        case '\'':
113            getChar();
114            return new Token(TokenType.APOSTROPHE, position);
115        default:
116            while (c != -1 && !specialCharacters.contains((char) c)) {
117                if (c == '\\') {
118                    getChar();
119                    if (c == 'n') {
120                        c = '\n';
121                    }
122                }
123                text.append((char) c);
124                getChar();
125            }
126            return new Token(TokenType.TEXT, position, text.toString());
127        }
128    }
129
130    public Token lookAhead() throws ParseError {
131        if (currentToken == null) {
132            currentToken = nextToken();
133        }
134        return currentToken;
135    }
136
137    public Token skip(char lastChar) {
138        currentToken = null;
139        int position = index;
140        StringBuilder result = new StringBuilder();
141        while (c != lastChar && c != -1) {
142            if (c == '\\') {
143                getChar();
144            }
145            result.append((char) c);
146            getChar();
147        }
148        return new Token(TokenType.TEXT, position, result.toString());
149    }
150}