public interface Balancer
Modifier and Type | Method and Description |
---|---|
void |
clear()
delete all urls from the stack
|
void |
close()
close the balancer object
|
Request |
get(byte[] urlhash)
get one url from the crawl stack
|
java.util.Map<java.lang.String,java.lang.Integer[]> |
getDomainStackHosts(RobotsTxt robots)
get a list of domains that are currently maintained as domain stacks
|
java.util.List<Request> |
getDomainStackReferences(java.lang.String host,
int maxcount,
long maxtime)
get lists of crawl request entries for a specific host
|
boolean |
getExceed134217727() |
int |
getOnDemandLimit() |
boolean |
has(byte[] urlhashb)
check if given url hash is contained in the balancer stack
|
boolean |
isEmpty()
check if stack is empty
|
java.util.Iterator<Request> |
iterator()
iterate through all requests in the queue
|
Request |
pop(boolean delay,
CrawlSwitchboard cs,
RobotsTxt robots)
get the next entry in this crawl queue in such a way that the domain access time delta is maximized
and always above the given minimum delay time.
|
java.lang.String |
push(Request entry,
CrawlProfile profile,
RobotsTxt robots)
push a crawl request on the balancer stack
|
int |
remove(HandleSet urlHashes) |
int |
removeAllByHostHashes(java.util.Set<java.lang.String> hosthashes)
delete all urls which are stored for given host hashes
|
int |
removeAllByProfileHandle(java.lang.String profileHandle,
long timeout)
delete all urls from the stack by given profile handle
|
int |
size()
get the size of the stack
|
void close()
void clear()
Request get(byte[] urlhash) throws java.io.IOException
urlhash
- java.io.IOException
int removeAllByProfileHandle(java.lang.String profileHandle, long timeout) throws java.io.IOException, SpaceExceededException
profileHandle
- timeout
- java.io.IOException
SpaceExceededException
int removeAllByHostHashes(java.util.Set<java.lang.String> hosthashes)
hosthashes
- int remove(HandleSet urlHashes) throws java.io.IOException
urlHashes,
- a list of hashes that shall be removedjava.io.IOException
boolean has(byte[] urlhashb)
urlhashb
- int size()
int getOnDemandLimit()
boolean getExceed134217727()
boolean isEmpty()
java.lang.String push(Request entry, CrawlProfile profile, RobotsTxt robots) throws java.io.IOException, SpaceExceededException
entry
- java.io.IOException
SpaceExceededException
java.util.Map<java.lang.String,java.lang.Integer[]> getDomainStackHosts(RobotsTxt robots)
java.util.List<Request> getDomainStackReferences(java.lang.String host, int maxcount, long maxtime)
host
- maxcount
- maxtime
- Request pop(boolean delay, CrawlSwitchboard cs, RobotsTxt robots) throws java.io.IOException
delay
- true if the requester demands forced delays using explicit thread sleepprofile
- java.io.IOException
SpaceExceededException
java.util.Iterator<Request> iterator() throws java.io.IOException
java.io.IOException