001// License: GPL. For details, see LICENSE file. 002package org.openstreetmap.josm.tools; 003 004import java.util.Arrays; 005import java.util.HashMap; 006import java.util.Map; 007 008/** 009 * A helper class that analyzes the text and attempts to parse tags from it 010 * @since 13544 (extracted from {@link TextTagParser}) 011 */ 012public class TextAnalyzer { 013 private boolean quotesStarted; 014 private boolean esc; 015 private final StringBuilder s = new StringBuilder(200); 016 private String valueStops = "\n\r\t"; 017 private int pos; 018 private final String data; 019 private final int n; 020 021 /** 022 * Create a new {@link TextAnalyzer} 023 * @param text The text to parse 024 */ 025 public TextAnalyzer(String text) { 026 pos = 0; 027 data = Utils.strip(text); 028 n = data.length(); 029 // fix #1604: allow space characters as value stops for single-line input only 030 if (data.indexOf('\r') == -1 && data.indexOf('\n') == -1) { 031 valueStops += " "; 032 } 033 } 034 035 /** 036 * Read tags from "Free format" 037 * @return map of tags 038 */ 039 public Map<String, String> getFreeParsedTags() { 040 String k, v; 041 Map<String, String> tags = new HashMap<>(); 042 043 while (true) { 044 skipEmpty(); 045 if (pos == n) { 046 break; 047 } 048 k = parseString("\n\r\t= "); 049 if (pos == n) { 050 tags.clear(); 051 break; 052 } 053 skipSign(); 054 if (pos == n) { 055 tags.clear(); 056 break; 057 } 058 v = parseString(valueStops); 059 tags.put(k, v); 060 } 061 return tags; 062 } 063 064 /** 065 * Parses current text to extract a key or value depending on given stop characters. 066 * @param stopChars Parsing will stop when one character of this string is found 067 * @return key or value extracted from current text 068 */ 069 public String parseString(String stopChars) { 070 char[] stop = stopChars.toCharArray(); 071 Arrays.sort(stop); 072 char c; 073 while (pos < n) { 074 c = data.charAt(pos); 075 if (esc) { 076 esc = false; 077 s.append(c); // \" \\ 078 } else if (c == '\\') { 079 esc = true; 080 } else if (c == '\"' && !quotesStarted) { // opening " 081 if (!s.toString().trim().isEmpty()) { // we had ||some text"|| 082 s.append(c); // just add ", not open 083 } else { 084 s.delete(0, s.length()); // forget that empty characthers and start reading ".... 085 quotesStarted = true; 086 } 087 } else if (c == '\"' && quotesStarted) { // closing " 088 quotesStarted = false; 089 pos++; 090 break; 091 } else if (!quotesStarted && (Arrays.binarySearch(stop, c) >= 0)) { 092 // stop-symbol found 093 pos++; 094 break; 095 } else { 096 // skip non-printable characters 097 if (c >= 32) s.append(c); 098 } 099 pos++; 100 } 101 102 String res = s.toString(); 103 s.delete(0, s.length()); 104 return res.trim(); 105 } 106 107 private void skipSign() { 108 char c; 109 boolean signFound = false; 110 while (pos < n) { 111 c = data.charAt(pos); 112 if (c == '\t' || c == '\n' || c == ' ') { 113 pos++; 114 } else if (c == '=') { 115 if (signFound) break; // a = =qwerty means "a"="=qwerty" 116 signFound = true; 117 pos++; 118 } else { 119 break; 120 } 121 } 122 } 123 124 private void skipEmpty() { 125 char c; 126 while (pos < n) { 127 c = data.charAt(pos); 128 if (c == '\t' || c == '\n' || c == '\r' || c == ' ') { 129 pos++; 130 } else { 131 break; 132 } 133 } 134 } 135}