net.sf.farrago.catalog
Class FarragoReposUtil.InvalidXmlCharFilterInputStream

java.lang.Object
  extended by java.io.InputStream
      extended by net.sf.farrago.catalog.FarragoReposUtil.InvalidXmlCharFilterInputStream
All Implemented Interfaces:
Closeable
Enclosing class:
FarragoReposUtil

public static class FarragoReposUtil.InvalidXmlCharFilterInputStream
extends InputStream


Nested Class Summary
private static class FarragoReposUtil.InvalidXmlCharFilterInputStream.ByteOutputStream
          ByteOutputStream extends ByteArrayOutputStream to provide ByteBuffer-like operations such as compact, array and get.
 
Field Summary
private static Map<byte[],String> ALL_DECLS
           
private  InputStream in
           
private  char[] inputBuffer
           
private static int MAX_DECL_SIZE
           
private  int numInvalidCharsFiltered
           
private static byte[] OTHER_ASCII_LIKE
           
private  FarragoReposUtil.InvalidXmlCharFilterInputStream.ByteOutputStream outputBuffer
           
private  Writer outputBufferWriter
           
private  Reader reader
           
private static byte[] UTF16_BE_BOM
           
private static byte[] UTF16_BE_SANS_BOM
           
private static byte[] UTF16_LE_BOM
           
private static byte[] UTF16_LE_SANS_BOM
           
private static byte[] UTF8_BOM
           
 
Constructor Summary
FarragoReposUtil.InvalidXmlCharFilterInputStream(InputStream in)
           
 
Method Summary
 int available()
           
private  boolean check(int ch)
           
 void close()
           
private static Charset getCharsetFromXmlDecl(Reader reader)
          Parses the XML declaration at the start of the Reader's input and returns the specified encoding, if any.
 int getNumInvalidCharsFiltered()
           
private static Charset guessCharset(InputStream in, FarragoReposUtil.InvalidXmlCharFilterInputStream.ByteOutputStream bufferStream)
          Guesses the character set used by the input stream, storing characters in the buffer stream.
 boolean markSupported()
           
private static boolean matches(byte[] data, byte[] expected)
          Compares two bytes arrays.
 int read()
           
 int read(byte[] b, int off, int len)
           
 long skip(long n)
           
private static byte[] toBytes(int[] data)
          Converts an int array to byte array.
 
Methods inherited from class java.io.InputStream
mark, read, reset
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

UTF16_BE_BOM

private static final byte[] UTF16_BE_BOM

UTF16_BE_SANS_BOM

private static final byte[] UTF16_BE_SANS_BOM

UTF16_LE_BOM

private static final byte[] UTF16_LE_BOM

UTF16_LE_SANS_BOM

private static final byte[] UTF16_LE_SANS_BOM

UTF8_BOM

private static final byte[] UTF8_BOM

OTHER_ASCII_LIKE

private static final byte[] OTHER_ASCII_LIKE

ALL_DECLS

private static final Map<byte[],String> ALL_DECLS

MAX_DECL_SIZE

private static final int MAX_DECL_SIZE
See Also:
Constant Field Values

in

private final InputStream in

numInvalidCharsFiltered

private int numInvalidCharsFiltered

outputBuffer

private FarragoReposUtil.InvalidXmlCharFilterInputStream.ByteOutputStream outputBuffer

outputBufferWriter

private Writer outputBufferWriter

reader

private Reader reader

inputBuffer

private char[] inputBuffer
Constructor Detail

FarragoReposUtil.InvalidXmlCharFilterInputStream

public FarragoReposUtil.InvalidXmlCharFilterInputStream(InputStream in)
                                                 throws IOException
Throws:
IOException
Method Detail

toBytes

private static byte[] toBytes(int[] data)
Converts an int array to byte array. Assumes all int values in the array contain only 8 bits of data.

Parameters:
data - int array
Returns:
byte array

matches

private static boolean matches(byte[] data,
                               byte[] expected)
Compares two bytes arrays. If the data array does not begin with exactly the bytes specified in the expected array, returns false. The data array may be longer than the expected array, but not shorter.

Parameters:
data - data to test
expected - expected value
Returns:
true if data and expected match (see above)

guessCharset

private static Charset guessCharset(InputStream in,
                                    FarragoReposUtil.InvalidXmlCharFilterInputStream.ByteOutputStream bufferStream)
                             throws IOException
Guesses the character set used by the input stream, storing characters in the buffer stream. The first block of data in the input stream is compared against the XML declarations in ALL_DECLS to find a characters set suitable for reading at least the XML declaration from the input stream.

Parameters:
in - input stream
bufferStream - buffer stream for temporary storage
Returns:
best-guess character set for the input stream
Throws:
IOException - on I/O error, if the character set cannot be detected or instantiated

getCharsetFromXmlDecl

private static Charset getCharsetFromXmlDecl(Reader reader)
                                      throws IOException
Parses the XML declaration at the start of the Reader's input and returns the specified encoding, if any. The given Reader must be configured with a character set encoding capable of reading the XML declaration (which will only contain a limited set of characters).

Parameters:
reader - a Reader configured with a suitable character set encoding
Returns:
the character set specified by the XML declaration, or null if not found
Throws:
IOException - on I/O error or if the named character set cannot be instantiated
IndexOutOfBoundsException - if the XML declaration is malformed

markSupported

public boolean markSupported()
Overrides:
markSupported in class InputStream

available

public int available()
              throws IOException
Overrides:
available in class InputStream
Throws:
IOException

close

public void close()
           throws IOException
Specified by:
close in interface Closeable
Overrides:
close in class InputStream
Throws:
IOException

read

public int read()
         throws IOException
Specified by:
read in class InputStream
Throws:
IOException

check

private boolean check(int ch)

read

public int read(byte[] b,
                int off,
                int len)
         throws IOException
Overrides:
read in class InputStream
Throws:
IOException

skip

public long skip(long n)
          throws IOException
Overrides:
skip in class InputStream
Throws:
IOException

getNumInvalidCharsFiltered

public int getNumInvalidCharsFiltered()