001// License: GPL. For details, see LICENSE file. 002package org.openstreetmap.josm.io; 003 004import java.io.IOException; 005import java.io.Reader; 006import java.util.Arrays; 007 008import org.openstreetmap.josm.tools.Logging; 009 010/** 011 * FilterInputStream that gets rid of characters that are invalid in an XML 1.0 012 * document. 013 * 014 * Although these characters are forbidden, in the real wold they still appear 015 * in XML files. Java's SAX parser throws an exception, so we have to filter 016 * at a lower level. 017 * 018 * Only handles control characters (<0x20). Invalid characters are replaced 019 * by space (0x20). 020 */ 021public class InvalidXmlCharacterFilter extends Reader { 022 023 private final Reader reader; 024 025 private static boolean firstWarning = true; 026 027 private static final boolean[] INVALID_CHARS; 028 029 static { 030 INVALID_CHARS = new boolean[0x20]; 031 Arrays.fill(INVALID_CHARS, true); 032 INVALID_CHARS[0x9] = false; // tab 033 INVALID_CHARS[0xA] = false; // LF 034 INVALID_CHARS[0xD] = false; // CR 035 } 036 037 /** 038 * Constructs a new {@code InvalidXmlCharacterFilter} for the given Reader. 039 * @param reader The reader to filter 040 */ 041 public InvalidXmlCharacterFilter(Reader reader) { 042 this.reader = reader; 043 } 044 045 @Override 046 public int read(char[] b, int off, int len) throws IOException { 047 int n = reader.read(b, off, len); 048 if (n == -1) { 049 return -1; 050 } 051 for (int i = off; i < off + n; ++i) { 052 b[i] = filter(b[i]); 053 } 054 return n; 055 } 056 057 @Override 058 public void close() throws IOException { 059 reader.close(); 060 } 061 062 private static char filter(char in) { 063 if (in < 0x20 && INVALID_CHARS[in]) { 064 if (firstWarning) { 065 Logging.warn("Invalid xml character encountered: '"+in+"'."); 066 firstWarning = false; 067 } 068 return 0x20; 069 } 070 return in; 071 } 072}