public class ScraperInputStream extends java.io.InputStream implements ScraperListener
Modifier and Type | Field and Description |
---|---|
private java.io.BufferedInputStream |
bufferedIn |
private boolean |
charsetChanged |
private java.lang.String |
detectedCharset |
private boolean |
endOfHead |
private int |
mode |
private static int |
MODE_PRESCAN |
private static int |
MODE_PRESCAN_FINISHED |
private static long |
preBufferSize |
private long |
preRead |
private java.io.Reader |
reader |
private java.io.Writer |
writer |
Constructor and Description |
---|
ScraperInputStream(java.io.InputStream inStream,
java.lang.String inputStreamCharset,
VocabularyScraper vocabularyScraper,
DigestURL rooturl,
Transformer transformer,
boolean passbyIfBinarySuspect,
int maxLinks,
int timezoneOffset) |
Modifier and Type | Method and Description |
---|---|
void |
close() |
java.lang.String |
detectCharset() |
private static java.lang.String |
extractCharsetFromMimetypeHeader(java.lang.String mimeType) |
int |
read() |
void |
scrapeTag0(java.lang.String tagname,
java.util.Properties tagopts) |
void |
scrapeTag1(java.lang.String tagname,
java.util.Properties tagopts,
char[] text) |
private static final int MODE_PRESCAN
private static final int MODE_PRESCAN_FINISHED
private int mode
private static final long preBufferSize
private long preRead
private final java.io.BufferedInputStream bufferedIn
private java.lang.String detectedCharset
private boolean charsetChanged
private boolean endOfHead
private java.io.Reader reader
private java.io.Writer writer
public ScraperInputStream(java.io.InputStream inStream, java.lang.String inputStreamCharset, VocabularyScraper vocabularyScraper, DigestURL rooturl, Transformer transformer, boolean passbyIfBinarySuspect, int maxLinks, int timezoneOffset)
private static java.lang.String extractCharsetFromMimetypeHeader(java.lang.String mimeType)
public void scrapeTag0(java.lang.String tagname, java.util.Properties tagopts)
scrapeTag0
in interface ScraperListener
public void scrapeTag1(java.lang.String tagname, java.util.Properties tagopts, char[] text)
scrapeTag1
in interface ScraperListener
public java.lang.String detectCharset() throws java.io.IOException
java.io.IOException
public int read() throws java.io.IOException
read
in class java.io.InputStream
java.io.IOException
public void close() throws java.io.IOException
close
in interface java.io.Closeable
close
in interface java.lang.AutoCloseable
close
in class java.io.InputStream
java.io.IOException