public class Response
extends java.lang.Object
Modifier and Type | Field and Description |
---|---|
private byte[] |
content |
static long |
CRAWLER_MAX_SIZE_TO_CACHE
Maximum file size to put in cache for crawler
|
static char |
DT_AUDIO |
static char |
DT_BINARY |
static char |
DT_DOC |
static char |
DT_FLASH |
static char |
DT_HTML |
static char |
DT_IMAGE |
static char |
DT_MOVIE |
static char |
DT_PDFPS |
static char |
DT_SHARE |
static char |
DT_TEXT |
static char |
DT_UNKNOWN |
private boolean |
fromCache |
private CrawlProfile |
profile |
static int |
QUEUE_STATE_CONDENSING |
static int |
QUEUE_STATE_FINISHED |
static int |
QUEUE_STATE_FRESH |
static int |
QUEUE_STATE_INDEXSTORAGE |
static int |
QUEUE_STATE_PARSING |
static int |
QUEUE_STATE_STRUCTUREANALYSIS |
private Request |
request |
private RequestHeader |
requestHeader |
private ResponseHeader |
responseHeader |
private int |
status |
Constructor and Description |
---|
Response(Request request,
CrawlProfile profile)
create a 'virtual' response that is composed using crawl details from the request object
this is used when the NOLOAD queue is processed
|
Response(Request request,
RequestHeader requestHeader,
ResponseHeader responseHeader,
CrawlProfile profile,
boolean fromCache,
byte[] content) |
Modifier and Type | Method and Description |
---|---|
int |
depth() |
char |
docType() |
static char |
docType(MultiProtocolURL url)
doctype calculation based on file extensions; this is the url wrapper
|
static char |
docType(java.lang.String mime)
doctype calculation based on the mime type
|
static java.lang.String[] |
doctype2mime(java.lang.String ext,
char doctype)
reverse mime type calculation; this is just a heuristic
|
static char |
docTypeExt(java.lang.String ext)
doctype calculation by file extension
TODO: this must be enhanced with a more generic way of configuration
|
boolean |
fromCache() |
java.lang.String |
getCharacterEncoding() |
byte[] |
getContent() |
java.lang.String |
getMimeType()
Get Mime type from http header or null if unknown (not included in response header)
|
ResponseHeader |
getResponseHeader() |
int |
getStatus() |
java.util.Date |
ifModifiedSince() |
byte[] |
initiator() |
boolean |
isFreshForProxy()
decide upon header information if a specific file should be taken from
the cache or not
|
java.util.Date |
lastModified()
Get respons header last modified date
if missing the first seen date or current date
|
java.lang.String |
name() |
Document[] |
parse() |
ResultURLs.EventOrigin |
processCase(java.lang.String mySeedHash) |
CrawlProfile |
profile() |
boolean |
proxy() |
byte[] |
referrerHash() |
DigestURL |
referrerURL() |
boolean |
requestProhibitsIndexing() |
boolean |
requestWithCookie() |
void |
setContent(byte[] data) |
java.lang.String |
shallIndexCacheForCrawler()
decide upon header information if a specific file should be indexed
this method returns null if the answer is 'YES'!
if the answer is 'NO' (do not index), it returns a string with the reason
to reject the crawling demand in clear text
This function is used by plasmaSwitchboard#processResourceStack
|
java.lang.String |
shallIndexCacheForProxy()
decide upon header information if a specific file should be indexed
this method returns null if the answer is 'YES'!
if the answer is 'NO' (do not index), it returns a string with the reason
to reject the crawling demand in clear text
This function is used by plasmaSwitchboard#processResourceStack
|
java.lang.String |
shallStoreCacheForCrawler() |
java.lang.String |
shallStoreCacheForProxy() |
long |
size() |
void |
updateStatus(int newStatus) |
DigestURL |
url() |
boolean |
validResponseStatus() |
public static final char DT_PDFPS
public static final char DT_TEXT
public static final char DT_HTML
public static final char DT_DOC
public static final char DT_IMAGE
public static final char DT_MOVIE
public static final char DT_FLASH
public static final char DT_SHARE
public static final char DT_AUDIO
public static final char DT_BINARY
public static final char DT_UNKNOWN
private final Request request
private final RequestHeader requestHeader
private final ResponseHeader responseHeader
private final CrawlProfile profile
private byte[] content
private int status
private final boolean fromCache
public static final long CRAWLER_MAX_SIZE_TO_CACHE
public static final int QUEUE_STATE_FRESH
public static final int QUEUE_STATE_PARSING
public static final int QUEUE_STATE_CONDENSING
public static final int QUEUE_STATE_STRUCTUREANALYSIS
public static final int QUEUE_STATE_INDEXSTORAGE
public static final int QUEUE_STATE_FINISHED
public Response(Request request, RequestHeader requestHeader, ResponseHeader responseHeader, CrawlProfile profile, boolean fromCache, byte[] content)
public Response(Request request, CrawlProfile profile)
request
- profile
- public static char docTypeExt(java.lang.String ext)
ext
- public static char docType(MultiProtocolURL url)
url
- public static char docType(java.lang.String mime)
mime
- public static java.lang.String[] doctype2mime(java.lang.String ext, char doctype)
ext
- doctype
- public void updateStatus(int newStatus)
public ResponseHeader getResponseHeader()
public boolean fromCache()
public int getStatus()
public java.lang.String name()
public DigestURL url()
public char docType()
public java.util.Date lastModified()
public CrawlProfile profile()
public byte[] initiator()
public boolean proxy()
public long size()
public int depth()
public void setContent(byte[] data)
public byte[] getContent()
public java.lang.String shallStoreCacheForProxy()
public java.lang.String shallStoreCacheForCrawler()
public boolean isFreshForProxy()
public final java.lang.String shallIndexCacheForProxy()
public final java.lang.String shallIndexCacheForCrawler()
public java.lang.String getMimeType()
public java.lang.String getCharacterEncoding()
public DigestURL referrerURL()
public byte[] referrerHash()
public boolean validResponseStatus()
public java.util.Date ifModifiedSince()
public boolean requestWithCookie()
public boolean requestProhibitsIndexing()
public ResultURLs.EventOrigin processCase(java.lang.String mySeedHash)
public Document[] parse() throws Parser.Failure
Parser.Failure