public class SurrogateReader
extends org.xml.sax.helpers.DefaultHandler
implements java.lang.Runnable
Modifier and Type | Field and Description |
---|---|
private java.lang.StringBuilder |
buffer |
private java.nio.charset.Charset |
charset |
private int |
concurrency |
private CollectionConfiguration |
configuration |
private CrawlStacker |
crawlStacker |
private DCEntry |
dcEntry |
private java.lang.String |
elementName |
private org.xml.sax.InputSource |
inputSource |
private java.io.PushbackInputStream |
inputStream |
private boolean |
parsingValue |
static SolrInputDocument |
POISON_DOCUMENT |
private javax.xml.parsers.SAXParser |
saxParser |
private java.util.concurrent.BlockingQueue<java.lang.Object> |
surrogates
Surrogates are either SolrInputDocument or DCEntry instances
|
static java.lang.String |
SURROGATES_MAIN_ELEMENT_CLOSE |
static java.lang.String |
SURROGATES_MAIN_ELEMENT_NAME |
static java.lang.String |
SURROGATES_MAIN_ELEMENT_OPEN |
private static java.lang.ThreadLocal<javax.xml.parsers.SAXParser> |
tlSax |
Constructor and Description |
---|
SurrogateReader(java.io.InputStream stream,
int queueSize,
CrawlStacker crawlStacker,
CollectionConfiguration configuration,
int concurrency) |
SurrogateReader(java.io.PushbackInputStream stream,
int queueSize,
CrawlStacker crawlStacker,
CollectionConfiguration configuration,
int concurrency) |
Modifier and Type | Method and Description |
---|---|
void |
characters(char[] ch,
int start,
int length) |
void |
endElement(java.lang.String uri,
java.lang.String name,
java.lang.String tag) |
private static javax.xml.parsers.SAXParser |
getParser() |
private boolean |
isSolrDump() |
void |
run() |
void |
startElement(java.lang.String uri,
java.lang.String name,
java.lang.String tag,
org.xml.sax.Attributes atts) |
java.lang.Object |
take() |
public static final java.lang.String SURROGATES_MAIN_ELEMENT_NAME
public static final java.lang.String SURROGATES_MAIN_ELEMENT_OPEN
public static final java.lang.String SURROGATES_MAIN_ELEMENT_CLOSE
public static final SolrInputDocument POISON_DOCUMENT
private final java.lang.StringBuilder buffer
private boolean parsingValue
private DCEntry dcEntry
private java.lang.String elementName
private final java.util.concurrent.BlockingQueue<java.lang.Object> surrogates
private javax.xml.parsers.SAXParser saxParser
private final org.xml.sax.InputSource inputSource
private final java.io.PushbackInputStream inputStream
private final CrawlStacker crawlStacker
private final CollectionConfiguration configuration
private final int concurrency
private java.nio.charset.Charset charset
private static final java.lang.ThreadLocal<javax.xml.parsers.SAXParser> tlSax
public SurrogateReader(java.io.InputStream stream, int queueSize, CrawlStacker crawlStacker, CollectionConfiguration configuration, int concurrency) throws java.io.IOException
java.io.IOException
public SurrogateReader(java.io.PushbackInputStream stream, int queueSize, CrawlStacker crawlStacker, CollectionConfiguration configuration, int concurrency) throws java.io.IOException
java.io.IOException
private static javax.xml.parsers.SAXParser getParser() throws org.xml.sax.SAXException
org.xml.sax.SAXException
public void run()
run
in interface java.lang.Runnable
private boolean isSolrDump()
public void startElement(java.lang.String uri, java.lang.String name, java.lang.String tag, org.xml.sax.Attributes atts) throws org.xml.sax.SAXException
startElement
in interface org.xml.sax.ContentHandler
startElement
in class org.xml.sax.helpers.DefaultHandler
org.xml.sax.SAXException
public void endElement(java.lang.String uri, java.lang.String name, java.lang.String tag)
endElement
in interface org.xml.sax.ContentHandler
endElement
in class org.xml.sax.helpers.DefaultHandler
public void characters(char[] ch, int start, int length)
characters
in interface org.xml.sax.ContentHandler
characters
in class org.xml.sax.helpers.DefaultHandler
public java.lang.Object take()