public class RecrawlBusyThread extends AbstractBusyThread
Modifier and Type | Field and Description |
---|---|
private int |
chunksize |
private int |
chunkstart |
private java.lang.String |
currentQuery |
private boolean |
includefailed |
(package private) Switchboard |
sb |
private java.lang.String |
solrSortBy |
static java.lang.String |
THREAD_NAME |
long |
urlsfound |
private java.util.Set<DigestURL> |
urlstack |
busytime, memuse, running
Constructor and Description |
---|
RecrawlBusyThread(Switchboard xsb) |
Modifier and Type | Method and Description |
---|---|
private boolean |
feedToCrawler()
feed urls to the local crawler
(Switchboard.addToCrawler() is not used here, as there existing urls are always skiped)
|
void |
freemem()
is called when an outOfMemoryCycle is performed
this method should try to free some memory, so that the job can be executed
|
boolean |
getIncludeFailed() |
int |
getJobCount() |
java.lang.String |
getQuery() |
boolean |
job()
Process query and hand over urls to the crawler
|
private boolean |
processSingleQuery()
Selects documents to recrawl the urls
|
void |
setIncludeFailed(boolean includefailedurls)
Flag to include failed urls (httpstatus_i <> 200)
if true -> currentQuery is used as is,
if false -> the term " AND (httpstatus_i:200)" is appended to currentQuery
|
void |
setQuery(java.lang.String q,
boolean includefailedurls)
Set the query to select documents to recrawl
and resets the counter to start a fresh query loop
|
getBusyCycles, getBusySleep, getHighCPUCycles, getIdleCycles, getIdleSleep, getOutOfMemoryCycles, getSleepTime, intermission, notifyThread, run, setBusySleep, setIdleSleep, setLoadPreReqisite, setMemPreReqisite, setObeyIntermission, setStartupSleep
announceThreadBlockApply, announceThreadBlockRelease, close, getBlockTime, getExecTime, getLongDescription, getMemoryUse, getMonitorURL, getShortDescription, jobExceptionHandler, open, setDescription, shutdownInProgress, terminate
activeCount, checkAccess, clone, countStackFrames, currentThread, destroy, dumpStack, enumerate, getAllStackTraces, getContextClassLoader, getDefaultUncaughtExceptionHandler, getId, getName, getPriority, getStackTrace, getState, getThreadGroup, getUncaughtExceptionHandler, holdsLock, interrupt, interrupted, isAlive, isDaemon, isInterrupted, join, join, join, resume, setContextClassLoader, setDaemon, setDefaultUncaughtExceptionHandler, setName, setPriority, setUncaughtExceptionHandler, sleep, sleep, start, stop, stop, suspend, toString, yield
equals, finalize, getClass, hashCode, notify, notifyAll, wait, wait, wait
close, getBlockTime, getExecTime, getLongDescription, getMemoryUse, getMonitorURL, getShortDescription, isAlive, jobExceptionHandler, open, setDescription, shutdownInProgress, start, terminate
public static final java.lang.String THREAD_NAME
private java.lang.String currentQuery
private boolean includefailed
private int chunkstart
private final int chunksize
final Switchboard sb
private final java.util.Set<DigestURL> urlstack
public long urlsfound
private java.lang.String solrSortBy
public RecrawlBusyThread(Switchboard xsb)
public void setQuery(java.lang.String q, boolean includefailedurls)
q
- select queryincludefailedurls
- true=all http status docs are recrawled, false=httpstatus=200 docs are recrawledpublic java.lang.String getQuery()
public void setIncludeFailed(boolean includefailedurls)
includefailedurls
- public boolean getIncludeFailed()
private boolean feedToCrawler()
public boolean job()
private boolean processSingleQuery()
public int getJobCount()
public void freemem()
BusyThread