Modifier and Type | Field and Description |
---|---|
private java.util.TreeMap<java.lang.Integer,Index> |
depthStacks |
private static int |
EcoFSBufferSize |
private boolean |
exceed134217727 |
private java.lang.String |
hostHash |
private java.lang.String |
hostName |
private java.io.File |
hostPath |
static java.lang.String |
indexSuffix |
private static ConcurrentLog |
log |
private static int |
objectIndexBufferSize |
private boolean |
onDemand |
private int |
port |
Constructor and Description |
---|
HostQueue(java.io.File hostPath,
boolean onDemand,
boolean exceed134217727) |
HostQueue(java.io.File hostsPath,
java.lang.String hostName,
int port,
boolean onDemand,
boolean exceed134217727) |
Modifier and Type | Method and Description |
---|---|
void |
clear()
delete all urls from the stack
|
void |
close()
close the balancer object
|
Request |
get(byte[] urlhash)
get one url from the crawl stack
|
java.util.Map<java.lang.String,java.lang.Integer[]> |
getDomainStackHosts(RobotsTxt robots)
get a list of domains that are currently maintained as domain stacks
|
java.util.List<Request> |
getDomainStackReferences(java.lang.String host,
int maxcount,
long maxtime)
get lists of crawl request entries for a specific host
|
boolean |
getExceed134217727() |
private java.io.File |
getFile(int depth) |
java.lang.String |
getHost() |
private Index |
getLowestStack() |
int |
getLowestStackDepth() |
int |
getOnDemandLimit() |
int |
getPort() |
private Index |
getStack(int depth) |
boolean |
has(byte[] urlhashb)
check if given url hash is contained in the balancer stack
|
private void |
init()
Opens and initializes the host queue
|
boolean |
isEmpty()
check if stack is empty
|
java.util.Iterator<Request> |
iterator()
iterate through all requests in the queue
|
private int |
openAllStacks() |
private Index |
openStack(java.io.File f) |
Request |
pop(boolean delay,
CrawlSwitchboard cs,
RobotsTxt robots)
get the next entry in this crawl queue in such a way that the domain access time delta is maximized
and always above the given minimum delay time.
|
java.lang.String |
push(Request entry,
CrawlProfile profile,
RobotsTxt robots)
push a crawl request on the balancer stack
|
int |
remove(HandleSet urlHashes)
remove urls from the queue
|
int |
removeAllByHostHashes(java.util.Set<java.lang.String> hosthashes)
delete all urls which are stored for given host hashes
|
int |
removeAllByProfileHandle(java.lang.String profileHandle,
long timeout)
delete all urls from the stack by given profile handle
|
int |
size()
get the size of the stack
|
private static final ConcurrentLog log
public static final java.lang.String indexSuffix
private static final int EcoFSBufferSize
private static final int objectIndexBufferSize
private final java.io.File hostPath
private final java.lang.String hostName
private java.lang.String hostHash
private final int port
private final boolean exceed134217727
private final boolean onDemand
private java.util.TreeMap<java.lang.Integer,Index> depthStacks
public HostQueue(java.io.File hostsPath, java.lang.String hostName, int port, boolean onDemand, boolean exceed134217727) throws java.net.MalformedURLException
java.net.MalformedURLException
public HostQueue(java.io.File hostPath, boolean onDemand, boolean exceed134217727) throws java.net.MalformedURLException
java.net.MalformedURLException
private final void init() throws java.net.MalformedURLException
java.net.MalformedURLException
- if directory for the host could not be createdpublic java.lang.String getHost()
public int getPort()
private int openAllStacks()
public int getLowestStackDepth()
private Index getLowestStack()
private Index getStack(int depth)
private java.io.File getFile(int depth)
private Index openStack(java.io.File f)
public void close()
Balancer
public void clear()
Balancer
public Request get(byte[] urlhash) throws java.io.IOException
Balancer
public int removeAllByProfileHandle(java.lang.String profileHandle, long timeout) throws java.io.IOException, SpaceExceededException
Balancer
removeAllByProfileHandle
in interface Balancer
java.io.IOException
SpaceExceededException
public int removeAllByHostHashes(java.util.Set<java.lang.String> hosthashes)
removeAllByHostHashes
in interface Balancer
hosthashes
- public int remove(HandleSet urlHashes) throws java.io.IOException
public boolean has(byte[] urlhashb)
Balancer
public int size()
Balancer
public boolean isEmpty()
Balancer
public java.lang.String push(Request entry, CrawlProfile profile, RobotsTxt robots) throws java.io.IOException, SpaceExceededException
Balancer
push
in interface Balancer
java.io.IOException
SpaceExceededException
public Request pop(boolean delay, CrawlSwitchboard cs, RobotsTxt robots) throws java.io.IOException
Balancer
public java.util.Iterator<Request> iterator() throws java.io.IOException
Balancer
public java.util.Map<java.lang.String,java.lang.Integer[]> getDomainStackHosts(RobotsTxt robots)
getDomainStackHosts
in interface Balancer
public java.util.List<Request> getDomainStackReferences(java.lang.String host, int maxcount, long maxtime)
getDomainStackReferences
in interface Balancer
host
- maxcount
- maxtime
- public int getOnDemandLimit()
getOnDemandLimit
in interface Balancer
public boolean getExceed134217727()
getExceed134217727
in interface Balancer