public class WebgraphConfiguration extends SchemaConfiguration implements java.io.Serializable
Configuration.Entry
Modifier and Type | Field and Description |
---|---|
private static long |
serialVersionUID |
lazy
Constructor and Description |
---|
WebgraphConfiguration(boolean lazy)
initialize with an empty ConfigurationSet which will cause that all the index
attributes are used
|
WebgraphConfiguration(java.io.File configurationFile,
boolean lazy)
initialize the schema with a given configuration file
the configuration file simply contains a list of lines with keywords
or keyword = value lines (while value is a custom Solr field name
|
Modifier and Type | Method and Description |
---|---|
void |
commit()
save configuration to file and update enum SolrFields
|
SolrInputDocument |
getEdge(CollectionConfiguration.Subgraph subgraph,
DigestURL source_url,
ResponseHeader responseHeader,
java.util.Map<java.lang.String,java.util.regex.Pattern> collections,
int crawldepth_source,
java.util.Set<ProcessType> processTypes,
java.lang.String sourceName,
boolean allAttr,
boolean generalNofollow,
int target_order,
AnchorURL target_url) |
java.util.List<SolrInputDocument> |
getEdges(CollectionConfiguration.Subgraph subgraph,
DigestURL source,
ResponseHeader responseHeader,
java.util.Map<java.lang.String,java.util.regex.Pattern> collections,
int crawldepth_source,
java.util.Set<ProcessType> processTypes,
java.util.Collection<AnchorURL> links,
java.lang.String sourceName) |
private static int |
relEval(java.lang.String rels)
encode a string containing attributes from anchor rel properties binary:
bit 0: "me" contained in rel
bit 1: "nofollow" contained in rel
|
SolrInputDocument |
toSolrInputDocument(SolrDocument doc)
Convert a SolrDocument to a SolrInputDocument.
|
add, add, add, add, add, add, add, add, add, contains, fill, getDate, remove, remove, toSolrDocument, toSolrInputDocument
add, add, add, contains, containsDisabled, entryIterator, main
ceilingEntry, ceilingKey, clear, clone, comparator, containsKey, containsValue, descendingKeySet, descendingMap, entrySet, firstEntry, firstKey, floorEntry, floorKey, get, headMap, headMap, higherEntry, higherKey, keySet, lastEntry, lastKey, lowerEntry, lowerKey, navigableKeySet, pollFirstEntry, pollLastEntry, put, putAll, remove, size, subMap, subMap, tailMap, tailMap, values
private static final long serialVersionUID
public WebgraphConfiguration(boolean lazy)
public WebgraphConfiguration(java.io.File configurationFile, boolean lazy) throws java.io.IOException
configurationFile
- java.io.IOException
public java.util.List<SolrInputDocument> getEdges(CollectionConfiguration.Subgraph subgraph, DigestURL source, ResponseHeader responseHeader, java.util.Map<java.lang.String,java.util.regex.Pattern> collections, int crawldepth_source, java.util.Set<ProcessType> processTypes, java.util.Collection<AnchorURL> links, java.lang.String sourceName)
public SolrInputDocument getEdge(CollectionConfiguration.Subgraph subgraph, DigestURL source_url, ResponseHeader responseHeader, java.util.Map<java.lang.String,java.util.regex.Pattern> collections, int crawldepth_source, java.util.Set<ProcessType> processTypes, java.lang.String sourceName, boolean allAttr, boolean generalNofollow, int target_order, AnchorURL target_url)
private static int relEval(java.lang.String rels)
rel
- public void commit() throws java.io.IOException
commit
in class Configuration
java.io.IOException
public SolrInputDocument toSolrInputDocument(SolrDocument doc)
toSolrInputDocument
in class SchemaConfiguration
doc
- the solr document