public class NoticedURL
extends java.lang.Object
Modifier and Type | Class and Description |
---|---|
static class |
NoticedURL.StackType |
Modifier and Type | Field and Description |
---|---|
private java.io.File |
cachePath |
private Balancer |
coreStack |
private Balancer |
limitStack |
private Balancer |
noloadStack |
private Balancer |
remoteStack |
Modifier | Constructor and Description |
---|---|
protected |
NoticedURL(java.io.File cachePath,
int onDemandLimit,
boolean exceed134217727) |
Modifier and Type | Method and Description |
---|---|
void |
clear() |
void |
clear(NoticedURL.StackType stackType) |
protected void |
close() |
protected boolean |
existsInStack(byte[] urlhashb) |
protected void |
finalize() |
protected Request |
get(byte[] urlhash) |
java.util.Map<java.lang.String,java.lang.Integer[]> |
getDomainStackHosts(NoticedURL.StackType stackType,
RobotsTxt robots)
get a list of domains that are currently maintained as domain stacks
|
java.util.List<Request> |
getDomainStackReferences(NoticedURL.StackType stackType,
java.lang.String host,
int maxcount,
long maxtime)
get lists of crawl request entries for a specific host
|
protected void |
initRemoteStack()
Init Remote crawl stack, internally called on 1st push to remoteStack
|
boolean |
isEmpty() |
boolean |
isEmpty(NoticedURL.StackType stackType) |
boolean |
isEmptyLocal() |
java.util.Iterator<Request> |
iterator(NoticedURL.StackType stackType) |
private static Request |
pop(Balancer balancer,
boolean delay,
CrawlSwitchboard cs,
RobotsTxt robots) |
Request |
pop(NoticedURL.StackType stackType,
boolean delay,
CrawlSwitchboard cs,
RobotsTxt robots) |
java.lang.String |
push(NoticedURL.StackType stackType,
Request entry,
CrawlProfile profile,
RobotsTxt robots)
push a crawl request on one of the different crawl stacks
|
int |
removeByHostHash(java.util.Set<java.lang.String> hosthashes) |
int |
removeByProfileHandle(java.lang.String handle,
long timeout) |
boolean |
removeByURLHash(byte[] urlhashBytes)
remove a CrawlEntry by a given hash.
|
protected void |
shift(NoticedURL.StackType fromStack,
NoticedURL.StackType toStack,
CrawlSwitchboard cs,
RobotsTxt robots) |
int |
size() |
int |
stackSize(NoticedURL.StackType stackType) |
private Balancer coreStack
private Balancer limitStack
private Balancer remoteStack
private Balancer noloadStack
private final java.io.File cachePath
protected NoticedURL(java.io.File cachePath, int onDemandLimit, boolean exceed134217727)
protected void initRemoteStack()
public void clear()
protected void close()
protected void finalize() throws java.lang.Throwable
finalize
in class java.lang.Object
java.lang.Throwable
public int size()
public boolean isEmptyLocal()
public boolean isEmpty()
public boolean isEmpty(NoticedURL.StackType stackType)
public int stackSize(NoticedURL.StackType stackType)
protected boolean existsInStack(byte[] urlhashb)
public java.lang.String push(NoticedURL.StackType stackType, Request entry, CrawlProfile profile, RobotsTxt robots)
stackType
- entry
- protected Request get(byte[] urlhash)
public boolean removeByURLHash(byte[] urlhashBytes)
urlhash
- public int removeByProfileHandle(java.lang.String handle, long timeout) throws SpaceExceededException
SpaceExceededException
public int removeByHostHash(java.util.Set<java.lang.String> hosthashes)
public java.util.Map<java.lang.String,java.lang.Integer[]> getDomainStackHosts(NoticedURL.StackType stackType, RobotsTxt robots)
public java.util.List<Request> getDomainStackReferences(NoticedURL.StackType stackType, java.lang.String host, int maxcount, long maxtime)
host
- maxcount
- public Request pop(NoticedURL.StackType stackType, boolean delay, CrawlSwitchboard cs, RobotsTxt robots) throws java.io.IOException
java.io.IOException
protected void shift(NoticedURL.StackType fromStack, NoticedURL.StackType toStack, CrawlSwitchboard cs, RobotsTxt robots)
public void clear(NoticedURL.StackType stackType)
private static Request pop(Balancer balancer, boolean delay, CrawlSwitchboard cs, RobotsTxt robots) throws java.io.IOException
java.io.IOException
public java.util.Iterator<Request> iterator(NoticedURL.StackType stackType)