public final class RobotsTxtParser
extends java.lang.Object
Modifier and Type | Field and Description |
---|---|
private java.lang.String |
agentName |
private java.util.ArrayList<java.lang.String> |
allowList |
private long |
crawlDelayMillis |
private java.util.ArrayList<java.lang.String> |
denyList |
private java.lang.String[] |
myNames |
private static java.util.regex.Pattern |
patternTab |
private static java.lang.String |
ROBOTS_ALLOW |
private static java.lang.String |
ROBOTS_COMMENT |
private static java.lang.String |
ROBOTS_CRAWL_DELAY |
private static java.lang.String |
ROBOTS_DISALLOW |
private static java.lang.String |
ROBOTS_SITEMAP |
private static java.lang.String |
ROBOTS_USER_AGENT |
private java.util.ArrayList<java.lang.String> |
sitemaps |
Modifier | Constructor and Description |
---|---|
protected |
RobotsTxtParser(java.lang.String[] myNames) |
protected |
RobotsTxtParser(java.lang.String[] myNames,
byte[] robotsTxt) |
Modifier and Type | Method and Description |
---|---|
protected java.lang.String |
agentName()
the user agent that was applied to get the crawl properties is recorded
because it is possible that this robots.txt parser applies to several user agents
which may be i.e.
|
protected java.util.ArrayList<java.lang.String> |
allowList() |
protected long |
crawlDelayMillis()
a crawl delay can be assigned to every agent or for all agents
a special case is where the user agent of this yacy peer is given explicitely
using the peer name and then if the crawl delay is given as '0' the crawler
does not make any no-DOS-forced crawl pause.
|
protected java.util.ArrayList<java.lang.String> |
denyList() |
private void |
parse(java.io.BufferedReader reader) |
protected java.util.ArrayList<java.lang.String> |
sitemap() |
private static final java.util.regex.Pattern patternTab
private static final java.lang.String ROBOTS_USER_AGENT
private static final java.lang.String ROBOTS_DISALLOW
private static final java.lang.String ROBOTS_ALLOW
private static final java.lang.String ROBOTS_COMMENT
private static final java.lang.String ROBOTS_SITEMAP
private static final java.lang.String ROBOTS_CRAWL_DELAY
private final java.util.ArrayList<java.lang.String> allowList
private final java.util.ArrayList<java.lang.String> denyList
private java.util.ArrayList<java.lang.String> sitemaps
private long crawlDelayMillis
private final java.lang.String[] myNames
private java.lang.String agentName
protected RobotsTxtParser(java.lang.String[] myNames)
protected RobotsTxtParser(java.lang.String[] myNames, byte[] robotsTxt)
private void parse(java.io.BufferedReader reader)
protected long crawlDelayMillis()
protected java.lang.String agentName()
protected java.util.ArrayList<java.lang.String> sitemap()
protected java.util.ArrayList<java.lang.String> allowList()
protected java.util.ArrayList<java.lang.String> denyList()