public final class Fulltext
extends java.lang.Object
Modifier and Type | Class and Description |
---|---|
class |
Fulltext.Export |
static class |
Fulltext.ExportFormat |
Modifier and Type | Field and Description |
---|---|
private java.io.File |
archivePath |
private CollectionConfiguration |
collectionConfiguration |
private long |
collectionSizeLastAccess
get the size of the default index
|
private long |
collectionSizeLastValue |
private Fulltext.Export |
exportthread |
private long |
lastCommit |
private java.io.File |
segmentPath |
private static java.lang.String[] |
SOLR_OLD_PATH |
private static java.lang.String |
SOLR_PATH |
private InstanceMirror |
solrInstances |
private WebgraphConfiguration |
webgraphConfiguration |
private boolean |
writeWebgraph |
static java.lang.String |
yacy_dump_prefix |
Modifier | Constructor and Description |
---|---|
protected |
Fulltext(java.io.File segmentPath,
java.io.File archivePath,
CollectionConfiguration collectionConfiguration,
WebgraphConfiguration webgraphConfiguration) |
Modifier and Type | Method and Description |
---|---|
int |
bufferSize() |
void |
clearCaches() |
void |
clearLocalSolr() |
void |
clearRemoteSolr() |
void |
close() |
long |
collectionSize() |
void |
commit(boolean softCommit) |
boolean |
connectedLocalSolr() |
boolean |
connectedRemoteSolr() |
void |
connectLocalSolr() |
void |
connectRemoteSolr(java.util.ArrayList<RemoteInstance> instances,
ShardSelection.Method shardMethod,
boolean writeEnabled) |
void |
deleteDomainErrors(java.util.Set<java.lang.String> hosthashes)
delete all documents within a domain that are registered as error document
|
private static void |
deleteDomainWithConstraint(SolrConnector connector,
java.lang.String fieldname,
java.util.Set<java.lang.String> hosthashes,
java.lang.String constraintQuery) |
void |
deleteOldDocuments(long deltaToNow,
boolean loaddate) |
void |
deleteStaleDomainHashes(java.util.Set<java.lang.String> hosthashes,
java.util.Date freshdate)
using a fragment of the url hash (6 bytes: bytes 6 to 11) it is possible to address all urls from a specific domain
here such a fragment can be used to delete all these domains at once
|
void |
deleteStaleDomainNames(java.util.Set<java.lang.String> hostnames,
java.util.Date freshdate) |
void |
disconnectLocalSolr() |
void |
disconnectRemoteSolr() |
java.util.List<java.io.File> |
dumpFiles() |
java.io.File |
dumpSolr()
create a dump file from the current solr directory
|
Fulltext.Export |
export() |
Fulltext.Export |
export(java.io.File f,
java.lang.String filter,
java.lang.String query,
Fulltext.ExportFormat format,
boolean dom,
boolean text) |
Fulltext.Export |
export(Fulltext.ExportFormat format,
java.lang.String filter,
java.lang.String query,
int maxseconds,
java.io.File path,
boolean dom,
boolean text) |
SolrConnector |
getConnectorForRead(java.lang.String corename) |
CollectionConfiguration |
getDefaultConfiguration() |
SolrConnector |
getDefaultConnector() |
EmbeddedSolrConnector |
getDefaultEmbeddedConnector() |
RemoteSolrConnector |
getDefaultRemoteSolrConnector() |
EmbeddedSolrConnector |
getEmbeddedConnector(java.lang.String corename) |
EmbeddedInstance |
getEmbeddedInstance() |
long |
getLoadTime(java.lang.String urlHash)
get the load time of a resource.
|
URIMetadataNode |
getMetadata(byte[] urlHash) |
private URIMetadataNode |
getMetadata(byte[] urlHash,
WordReferenceVars wre,
float score) |
URIMetadataNode |
getMetadata(WeakPriorityBlockingQueue.Element<WordReferenceVars> element) |
java.util.Map<java.lang.String,SolrInfoMBean> |
getSolrInfoBeans() |
DigestURL |
getURL(java.lang.String urlHash) |
WebgraphConfiguration |
getWebgraphConfiguration() |
SolrConnector |
getWebgraphConnector() |
void |
optimize(int size)
optimize solr (experimental to check resource management)
|
void |
putDocument(SolrInputDocument doc) |
void |
putEdges(java.util.Collection<SolrInputDocument> edges) |
void |
putMetadata(URIMetadataNode entry)
deprecated method to store document metadata, use Solr documents wherever possible
|
void |
rebootSolr()
reboot solr (experimental to check resource management)
|
boolean |
remove(byte[] urlHash) |
void |
remove(java.util.Collection<java.lang.String> deleteIDs)
remove a list of id's from the index
|
int |
remove(java.lang.String basepath,
java.util.Date freshdate)
remove a full subpath from the index
|
void |
restoreSolr(java.io.File solrDumpZipFile)
restore a solr dump to the current solr directory
|
void |
setUseWebgraph(boolean check) |
boolean |
useWebgraph() |
long |
webgraphSize()
get the size of the webgraph index
|
private static final java.lang.String SOLR_PATH
private static final java.lang.String[] SOLR_OLD_PATH
private final java.io.File segmentPath
private final java.io.File archivePath
private Fulltext.Export exportthread
private InstanceMirror solrInstances
private final CollectionConfiguration collectionConfiguration
private final WebgraphConfiguration webgraphConfiguration
private boolean writeWebgraph
private long collectionSizeLastAccess
private long collectionSizeLastValue
private long lastCommit
public static final java.lang.String yacy_dump_prefix
protected Fulltext(java.io.File segmentPath, java.io.File archivePath, CollectionConfiguration collectionConfiguration, WebgraphConfiguration webgraphConfiguration)
public void setUseWebgraph(boolean check)
public boolean useWebgraph()
public CollectionConfiguration getDefaultConfiguration()
public WebgraphConfiguration getWebgraphConfiguration()
public boolean connectedLocalSolr()
public void connectLocalSolr() throws java.io.IOException
java.io.IOException
public void disconnectLocalSolr()
public boolean connectedRemoteSolr()
public void connectRemoteSolr(java.util.ArrayList<RemoteInstance> instances, ShardSelection.Method shardMethod, boolean writeEnabled)
public void disconnectRemoteSolr()
public EmbeddedSolrConnector getDefaultEmbeddedConnector()
public EmbeddedSolrConnector getEmbeddedConnector(java.lang.String corename)
public SolrConnector getConnectorForRead(java.lang.String corename)
public RemoteSolrConnector getDefaultRemoteSolrConnector()
public EmbeddedInstance getEmbeddedInstance()
public SolrConnector getDefaultConnector()
public SolrConnector getWebgraphConnector()
public java.util.Map<java.lang.String,SolrInfoMBean> getSolrInfoBeans()
public int bufferSize()
public void clearCaches()
public void clearLocalSolr() throws java.io.IOException
java.io.IOException
public void clearRemoteSolr() throws java.io.IOException
java.io.IOException
public long collectionSize()
public long webgraphSize()
public void close()
public void commit(boolean softCommit)
public URIMetadataNode getMetadata(WeakPriorityBlockingQueue.Element<WordReferenceVars> element)
public URIMetadataNode getMetadata(byte[] urlHash)
private URIMetadataNode getMetadata(byte[] urlHash, WordReferenceVars wre, float score)
public void putDocument(SolrInputDocument doc) throws java.io.IOException
java.io.IOException
public void putEdges(java.util.Collection<SolrInputDocument> edges) throws java.io.IOException
java.io.IOException
public void putMetadata(URIMetadataNode entry) throws java.io.IOException
java.io.IOException
public void deleteStaleDomainHashes(java.util.Set<java.lang.String> hosthashes, java.util.Date freshdate)
hosthash
- the hash of the host to be deletedfreshdate
- either NULL or a date in the past which is the limit for deletion. Only documents older than this date are deletedjava.io.IOException
public void deleteStaleDomainNames(java.util.Set<java.lang.String> hostnames, java.util.Date freshdate)
public void deleteDomainErrors(java.util.Set<java.lang.String> hosthashes)
hosthashes
- private static void deleteDomainWithConstraint(SolrConnector connector, java.lang.String fieldname, java.util.Set<java.lang.String> hosthashes, java.lang.String constraintQuery)
public void deleteOldDocuments(long deltaToNow, boolean loaddate)
public int remove(java.lang.String basepath, java.util.Date freshdate)
subpath
- the left path of the url; at least until the end of the hostfreshdate
- either NULL or a date in the past which is the limit for deletion. Only documents older than this date are deletedconcurrently
- if true, then the method returnes immediately and runs concurrentlypublic void remove(java.util.Collection<java.lang.String> deleteIDs)
deleteIDs
- a list of urlhashes; each denoting a documentconcurrently
- if true, then the method returnes immediately and runs concurrentlypublic boolean remove(byte[] urlHash)
public DigestURL getURL(java.lang.String urlHash) throws java.io.IOException
java.io.IOException
public long getLoadTime(java.lang.String urlHash) throws java.io.IOException
urlHash
- java.io.IOException
public java.util.List<java.io.File> dumpFiles()
public java.io.File dumpSolr()
public void restoreSolr(java.io.File solrDumpZipFile)
solrDumpZipFile
- public void optimize(int size)
size
- public void rebootSolr()
public Fulltext.Export export(Fulltext.ExportFormat format, java.lang.String filter, java.lang.String query, int maxseconds, java.io.File path, boolean dom, boolean text) throws java.io.IOException
java.io.IOException
public Fulltext.Export export(java.io.File f, java.lang.String filter, java.lang.String query, Fulltext.ExportFormat format, boolean dom, boolean text)
public Fulltext.Export export()