public final class HTTPLoader
extends java.lang.Object
Modifier and Type | Field and Description |
---|---|
static java.lang.String |
DEFAULT_ACCEPT |
private static java.lang.String |
DEFAULT_CHARSET |
static int |
DEFAULT_CRAWLING_RETRY_COUNT |
private static java.lang.String |
DEFAULT_ENCODING |
private static java.lang.String |
DEFAULT_LANGUAGE |
static int |
DEFAULT_MAXFILESIZE |
private ConcurrentLog |
log |
private Switchboard |
sb |
private int |
socketTimeout
The socket timeout that should be used
|
Constructor and Description |
---|
HTTPLoader(Switchboard sb,
ConcurrentLog theLog) |
Modifier and Type | Method and Description |
---|---|
private RequestHeader |
createRequestheader(Request request,
ClientIdentification.Agent agent)
Create request header for loading content.
|
private DigestURL |
extractRedirectURL(Request request,
CrawlProfile profile,
DigestURL url,
StatusLine statusline,
ResponseHeader responseHeader,
java.lang.String requestURLString)
Extract redirect URL from response header.
|
static Response |
load(Request request,
ClientIdentification.Agent agent) |
private static Response |
load(Request request,
ClientIdentification.Agent agent,
int retryCount) |
Response |
load(Request entry,
CrawlProfile profile,
int maxFileSize,
Blacklist.BlacklistType blacklistType,
ClientIdentification.Agent agent) |
private Response |
load(Request request,
CrawlProfile profile,
int retryCount,
int maxFileSize,
Blacklist.BlacklistType blacklistType,
ClientIdentification.Agent agent) |
java.io.InputStream |
openInputStream(Request request,
CrawlProfile profile,
int retryCount,
int maxFileSize,
Blacklist.BlacklistType blacklistType,
ClientIdentification.Agent agent)
Open input stream on a requested HTTP resource.
|
private static final java.lang.String DEFAULT_ENCODING
private static final java.lang.String DEFAULT_LANGUAGE
private static final java.lang.String DEFAULT_CHARSET
public static final java.lang.String DEFAULT_ACCEPT
public static final int DEFAULT_MAXFILESIZE
public static final int DEFAULT_CRAWLING_RETRY_COUNT
private final int socketTimeout
private final Switchboard sb
private final ConcurrentLog log
public HTTPLoader(Switchboard sb, ConcurrentLog theLog)
public Response load(Request entry, CrawlProfile profile, int maxFileSize, Blacklist.BlacklistType blacklistType, ClientIdentification.Agent agent) throws java.io.IOException
java.io.IOException
public java.io.InputStream openInputStream(Request request, CrawlProfile profile, int retryCount, int maxFileSize, Blacklist.BlacklistType blacklistType, ClientIdentification.Agent agent) throws java.io.IOException
request
- profile
- crawl profileretryCount
- remaining redirect retries countmaxFileSize
- max file size to load. -1 means no limit.blacklistType
- blacklist type to useagent
- agent identifierjava.io.IOException
- when an error occuredprivate DigestURL extractRedirectURL(Request request, CrawlProfile profile, DigestURL url, StatusLine statusline, ResponseHeader responseHeader, java.lang.String requestURLString) throws java.io.IOException
java.io.IOException
- when an error occuredprivate RequestHeader createRequestheader(Request request, ClientIdentification.Agent agent) throws java.io.IOException
request
- search requestagent
- agent identification informationjava.io.IOException
- when an error occuredprivate Response load(Request request, CrawlProfile profile, int retryCount, int maxFileSize, Blacklist.BlacklistType blacklistType, ClientIdentification.Agent agent) throws java.io.IOException
java.io.IOException
public static Response load(Request request, ClientIdentification.Agent agent) throws java.io.IOException
java.io.IOException
private static Response load(Request request, ClientIdentification.Agent agent, int retryCount) throws java.io.IOException
java.io.IOException