public class CrawlQueues
extends java.lang.Object
Modifier and Type | Class and Description |
---|---|
private class |
CrawlQueues.Loader |
Modifier and Type | Field and Description |
---|---|
java.util.Map<java.lang.String,DigestURL> |
delegatedURL |
ErrorCache |
errorURL |
private static ConcurrentLog |
log |
NoticedURL |
noticeURL |
private static Request |
POISON_REQUEST |
private java.util.ArrayList<java.lang.String> |
remoteCrawlProviderHashes |
private Switchboard |
sb |
private CrawlQueues.Loader[] |
worker |
private java.util.concurrent.ArrayBlockingQueue<Request> |
workerQueue |
Constructor and Description |
---|
CrawlQueues(Switchboard sb,
java.io.File queuePath) |
Modifier and Type | Method and Description |
---|---|
java.util.Map<DigestURL,Request> |
activeWorkerEntries() |
boolean |
autocrawlJob() |
void |
clear() |
void |
close() |
boolean |
coreCrawlJob() |
int |
coreCrawlJobSize() |
private void |
ensureLoaderRunning() |
HarvestProcess |
exists(byte[] hash)
tests if hash occurs in any database
|
void |
freemem() |
DigestURL |
getURL(byte[] urlhash) |
int |
hostcount(java.lang.String host)
count the number of same host names in the worker
|
void |
initRemoteCrawlQueues() |
private boolean |
isPaused(java.lang.String crawljob)
if crawling was paused we have to wait until we were notified to continue
blocks until pause is ended
|
int |
limitCrawlJobSize() |
private void |
load(Request urlEntry,
java.lang.String stats)
Make some checks if crawl is valid and start it
|
private java.lang.String |
loadIsPossible(NoticedURL.StackType stackType)
Checks if crawl queue has elements and new crawl will not exceed thread-limit
|
int |
noloadCrawlJobSize() |
void |
relocate(java.io.File newQueuePath)
Relocation is necessary if the user switches the network.
|
boolean |
remoteCrawlLoaderJob() |
boolean |
remoteTriggeredCrawlJob() |
int |
remoteTriggeredCrawlJobSize() |
int |
removeHosts(java.util.Set<java.lang.String> hosthashes) |
void |
removeURL(byte[] hash) |
private static java.lang.String |
urlToString(DigestURL url) |
private static final Request POISON_REQUEST
private static final ConcurrentLog log
private final Switchboard sb
private final CrawlQueues.Loader[] worker
private final java.util.concurrent.ArrayBlockingQueue<Request> workerQueue
private java.util.ArrayList<java.lang.String> remoteCrawlProviderHashes
public NoticedURL noticeURL
public ErrorCache errorURL
public java.util.Map<java.lang.String,DigestURL> delegatedURL
public CrawlQueues(Switchboard sb, java.io.File queuePath)
public void initRemoteCrawlQueues()
public void relocate(java.io.File newQueuePath)
newQueuePath
- public void close()
public void clear()
public HarvestProcess exists(byte[] hash)
hash
- public int hostcount(java.lang.String host)
host
- public void removeURL(byte[] hash)
public int removeHosts(java.util.Set<java.lang.String> hosthashes)
public DigestURL getURL(byte[] urlhash)
public void freemem()
public int coreCrawlJobSize()
public boolean coreCrawlJob()
private void load(Request urlEntry, java.lang.String stats)
urlEntry
- profileHandle
- stats
- String for log prefixingprivate boolean isPaused(java.lang.String crawljob)
crawljob
- private java.lang.String loadIsPossible(NoticedURL.StackType stackType)
stackType
- public boolean remoteCrawlLoaderJob()
public boolean autocrawlJob()
private static java.lang.String urlToString(DigestURL url)
url
- public int limitCrawlJobSize()
public int noloadCrawlJobSize()
public int remoteTriggeredCrawlJobSize()
public boolean remoteTriggeredCrawlJob()
private void ensureLoaderRunning()