Modifier and Type | Class and Description |
---|---|
private class |
LegacyBalancer.EntryIterator |
private static class |
LegacyBalancer.HostHandles |
Modifier and Type | Field and Description |
---|---|
private java.io.File |
cacheStacksPath |
private java.util.concurrent.ConcurrentMap<java.lang.String,LegacyBalancer.HostHandles> |
domainStacks |
private int |
domStackInitSize |
private HandleSet |
double_push_check |
private static int |
EcoFSBufferSize |
private static java.lang.String |
indexSuffix |
private long |
lastDomainStackFill |
private static int |
MAX_DOUBLE_PUSH_CHECK |
private static int |
objectIndexBufferSize |
private java.util.Random |
random |
private BufferedObjectIndex |
urlFileIndex |
private java.util.List<java.util.Map.Entry<java.lang.String,byte[]>> |
zeroWaitingCandidates |
Constructor and Description |
---|
LegacyBalancer(java.io.File cachePath,
java.lang.String stackname,
boolean useTailCache,
boolean exceed134217727) |
Modifier and Type | Method and Description |
---|---|
void |
clear()
delete all urls from the stack
|
void |
close()
close the balancer object
|
private void |
fillDomainStacks() |
Request |
get(byte[] urlhash)
get one url from the crawl stack
|
private byte[] |
getbest(RobotsTxt robots,
CrawlSwitchboard cs) |
java.util.Map<java.lang.String,java.lang.Integer[]> |
getDomainStackHosts(RobotsTxt robots)
get a list of domains that are currently maintained as domain stacks
|
java.util.List<Request> |
getDomainStackReferences(java.lang.String host,
int maxcount,
long maxtime)
get lists of crawl request entries for a specific host
|
boolean |
getExceed134217727() |
int |
getOnDemandLimit() |
boolean |
has(byte[] urlhashb)
check if given url hash is contained in the balancer stack
|
boolean |
isEmpty()
check if stack is empty
|
java.util.Iterator<Request> |
iterator()
iterate through all requests in the queue
|
private byte[] |
pickFromZeroWaiting() |
Request |
pop(boolean delay,
CrawlSwitchboard cs,
RobotsTxt robots)
get the next entry in this crawl queue in such a way that the domain access time delta is maximized
and always above the given minimum delay time.
|
java.lang.String |
push(Request entry,
CrawlProfile profile,
RobotsTxt robots)
push a crawl request on the balancer stack
|
private void |
pushHashToDomainStacks(java.lang.String host,
java.lang.String hosthash,
byte[] urlhash) |
int |
remove(HandleSet urlHashes)
this method is only here, because so many import/export methods need it
and it was implemented in the previous architecture
however, usage is not recommended
|
int |
removeAllByHostHashes(java.util.Set<java.lang.String> hosthashes)
delete all urls which are stored for given host hashes
|
int |
removeAllByProfileHandle(java.lang.String profileHandle,
long timeout)
delete all urls from the stack by given profile handle
|
private void |
removeHashFromDomainStacks(java.lang.String host,
byte[] urlhash) |
int |
size()
get the size of the stack
|
private static final java.lang.String indexSuffix
private static final int EcoFSBufferSize
private static final int objectIndexBufferSize
private static final int MAX_DOUBLE_PUSH_CHECK
private final java.io.File cacheStacksPath
private BufferedObjectIndex urlFileIndex
private final java.util.concurrent.ConcurrentMap<java.lang.String,LegacyBalancer.HostHandles> domainStacks
private final HandleSet double_push_check
private long lastDomainStackFill
private int domStackInitSize
private final java.util.List<java.util.Map.Entry<java.lang.String,byte[]>> zeroWaitingCandidates
private final java.util.Random random
public LegacyBalancer(java.io.File cachePath, java.lang.String stackname, boolean useTailCache, boolean exceed134217727)
public int getOnDemandLimit()
getOnDemandLimit
in interface Balancer
public boolean getExceed134217727()
getExceed134217727
in interface Balancer
public void close()
Balancer
public void clear()
Balancer
public Request get(byte[] urlhash) throws java.io.IOException
Balancer
public int removeAllByProfileHandle(java.lang.String profileHandle, long timeout) throws java.io.IOException, SpaceExceededException
Balancer
removeAllByProfileHandle
in interface Balancer
java.io.IOException
SpaceExceededException
public int remove(HandleSet urlHashes) throws java.io.IOException
public boolean has(byte[] urlhashb)
Balancer
public int size()
Balancer
public boolean isEmpty()
Balancer
public java.lang.String push(Request entry, CrawlProfile profile, RobotsTxt robots) throws java.io.IOException, SpaceExceededException
push
in interface Balancer
entry
- java.io.IOException
SpaceExceededException
public java.util.Map<java.lang.String,java.lang.Integer[]> getDomainStackHosts(RobotsTxt robots)
getDomainStackHosts
in interface Balancer
public java.util.List<Request> getDomainStackReferences(java.lang.String host, int maxcount, long maxtime)
getDomainStackReferences
in interface Balancer
host
- maxcount
- maxtime
- private void pushHashToDomainStacks(java.lang.String host, java.lang.String hosthash, byte[] urlhash) throws SpaceExceededException
SpaceExceededException
private void removeHashFromDomainStacks(java.lang.String host, byte[] urlhash)
public Request pop(boolean delay, CrawlSwitchboard cs, RobotsTxt robots) throws java.io.IOException
pop
in interface Balancer
delay
- true if the requester demands forced delays using explicit thread sleepprofile
- java.io.IOException
SpaceExceededException
private byte[] getbest(RobotsTxt robots, CrawlSwitchboard cs)
private byte[] pickFromZeroWaiting()
private void fillDomainStacks() throws java.io.IOException
java.io.IOException
public java.util.Iterator<Request> iterator() throws java.io.IOException
Balancer
public int removeAllByHostHashes(java.util.Set<java.lang.String> hosthashes)
Balancer
removeAllByHostHashes
in interface Balancer