public final class IndexCell<ReferenceType extends Reference> extends AbstractBufferedIndex<ReferenceType> implements BufferedIndex<ReferenceType>, java.lang.Iterable<ReferenceContainer<ReferenceType>>
Modifier and Type | Class and Description |
---|---|
private class |
IndexCell.FlushThread |
private static class |
IndexCell.RemoveReducer<ReferenceType extends Reference> |
Modifier and Type | Field and Description |
---|---|
private ReferenceContainerArray<ReferenceType> |
array |
private static long |
cleanupCycle |
private ComparableARC<byte[],java.lang.Integer> |
countCache |
private static long |
dumpCycle |
private boolean |
flushShallRun |
private java.lang.Thread |
flushThread |
private long |
lastCleanup |
private long |
lastDump |
private long |
maxFileSize |
private int |
maxRamEntries |
private IODispatcher |
merger |
private ReferenceContainerCache<ReferenceType> |
ram |
private java.util.Map<byte[],HandleSet> |
removeDelayedURLs |
private long |
targetFileSize |
private int |
writeBufferSize |
factory
Constructor and Description |
---|
IndexCell(java.io.File cellPath,
java.lang.String prefix,
ReferenceFactory<ReferenceType> factory,
ByteOrder termOrder,
int termSize,
int maxRamEntries,
long targetFileSize,
long maxFileSize,
int writeBufferSize,
IODispatcher merger) |
Modifier and Type | Method and Description |
---|---|
void |
add(byte[] termHash,
ReferenceType entry)
add a single reference to the reverse index
if no references to the word are stored, the a new entry is added,
if there are already references to the word hash stored,
then the old and the new references are merged
|
void |
add(ReferenceContainer<ReferenceType> newEntries)
add entries to the cell: this adds the new entries always to the RAM part, never to BLOBs
|
void |
clear()
clear the RAM and BLOB part, deletes everything in the cell
|
void |
clearCache() |
void |
close()
when a cell is closed, the current RAM is dumped to a file which will be opened as
BLOB file the next time a cell is opened.
|
int |
count(byte[] termHash)
count number of references for a given term
this method may cause strong IO load if called too frequently.
|
void |
delete(byte[] termHash)
delete all references for a word
the difference to 'remove' is, that the removed element is not returned
|
int |
deleteOld(int minsize,
long maxtime) |
ReferenceContainer<ReferenceType> |
get(byte[] termHash,
HandleSet urlselection)
all containers in the BLOBs and the RAM are merged and returned.
|
long |
getBufferMaxAge()
return the date of the most recent buffer entry
|
int |
getBufferMaxReferences()
return the maximum number of references, that one buffer entry has stored
|
long |
getBufferMinAge()
return the date of the oldest buffer entry
|
int |
getBufferSize()
get the size of the buffer content
|
long |
getBufferSizeBytes()
calculate the memory that is taken by the buffer.
|
int |
getSegmentCount() |
boolean |
has(byte[] termHash)
checks if there is any container for this termHash, either in RAM or any BLOB
|
boolean |
isEmpty() |
java.util.Iterator<ReferenceContainer<ReferenceType>> |
iterator() |
private java.util.Collection<byte[]> |
keys4LargeReferences(int minsize,
long maxtime) |
int |
minMem()
calculate needed memory
|
CloneableIterator<ReferenceContainer<ReferenceType>> |
referenceContainerIterator(byte[] startTermHash,
boolean rot,
boolean excludePrivate)
iterate all references from the beginning of a specific word hash
|
CloneableIterator<ReferenceContainer<ReferenceType>> |
referenceContainerIterator(byte[] startTermHash,
boolean rot,
boolean excludePrivate,
boolean ram)
iterate over entries in index.
|
CloneableIterator<Rating<byte[]>> |
referenceCountIterator(byte[] starttermHash,
boolean rot,
boolean excludePrivate)
iterate all references from the beginning of a specific word hash
|
ReferenceContainer<ReferenceType> |
remove(byte[] termHash)
deleting a container affects the containers in RAM and all the BLOB files
the deleted containers are merged and returned as result of the method
|
boolean |
remove(byte[] termHash,
byte[] urlHashBytes)
remove a specific reference entry
|
int |
remove(byte[] termHash,
HandleSet urlHashes)
remove url references from a selected word hash.
|
void |
removeDelayed() |
void |
removeDelayed(byte[] termHash,
byte[] urlHashBytes) |
void |
setBufferMaxWordCount(int maxWords)
set the size of the buffer, which can be defined with a given maximum number
of words that shall be stored.
|
private boolean |
shrink(long targetFileSize,
long maxFileSize) |
int |
size()
the number of all references
|
private int[] |
sizes() |
int |
sizesMax() |
int |
termKeyLength()
every index entry is made for a term which has a fixed size
|
ByteOrder |
termKeyOrdering()
return the order that is used for the storage of the word hashes
|
referenceContainer
merge, query, referenceRow, remove, searchConjunction
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
referenceContainer
merge, referenceRow, remove, searchConjunction
private static final long cleanupCycle
private static final long dumpCycle
private final ReferenceContainerArray<ReferenceType extends Reference> array
private ReferenceContainerCache<ReferenceType extends Reference> ram
private final ComparableARC<byte[],java.lang.Integer> countCache
private int maxRamEntries
private IODispatcher merger
private long lastCleanup
private long lastDump
private final long targetFileSize
private final long maxFileSize
private final int writeBufferSize
private final java.util.Map<byte[],HandleSet> removeDelayedURLs
private boolean flushShallRun
private final java.lang.Thread flushThread
public IndexCell(java.io.File cellPath, java.lang.String prefix, ReferenceFactory<ReferenceType> factory, ByteOrder termOrder, int termSize, int maxRamEntries, long targetFileSize, long maxFileSize, int writeBufferSize, IODispatcher merger) throws java.io.IOException
java.io.IOException
private boolean shrink(long targetFileSize, long maxFileSize)
public int deleteOld(int minsize, long maxtime) throws java.io.IOException
java.io.IOException
private java.util.Collection<byte[]> keys4LargeReferences(int minsize, long maxtime) throws java.io.IOException
java.io.IOException
public int termKeyLength()
termKeyLength
in interface Index<ReferenceType extends Reference>
public void add(ReferenceContainer<ReferenceType> newEntries) throws java.io.IOException, SpaceExceededException
add
in interface Index<ReferenceType extends Reference>
newEntries
- the References to be merged with existing referencesjava.io.IOException
SpaceExceededException
public void add(byte[] termHash, ReferenceType entry) throws java.io.IOException, SpaceExceededException
Index
add
in interface Index<ReferenceType extends Reference>
java.io.IOException
SpaceExceededException
public boolean has(byte[] termHash)
has
in interface Index<ReferenceType extends Reference>
public int count(byte[] termHash)
count
in interface Index<ReferenceType extends Reference>
public ReferenceContainer<ReferenceType> get(byte[] termHash, HandleSet urlselection) throws java.io.IOException
get
in interface Index<ReferenceType extends Reference>
java.io.IOException
public ReferenceContainer<ReferenceType> remove(byte[] termHash) throws java.io.IOException
remove
in interface Index<ReferenceType extends Reference>
java.io.IOException
public void delete(byte[] termHash) throws java.io.IOException
Index
delete
in interface Index<ReferenceType extends Reference>
java.io.IOException
public void removeDelayed(byte[] termHash, byte[] urlHashBytes)
removeDelayed
in interface Index<ReferenceType extends Reference>
public void removeDelayed() throws java.io.IOException
removeDelayed
in interface Index<ReferenceType extends Reference>
java.io.IOException
public int remove(byte[] termHash, HandleSet urlHashes) throws java.io.IOException
remove
in interface Index<ReferenceType extends Reference>
termHash
- the key for the referencesjava.io.IOException
public boolean remove(byte[] termHash, byte[] urlHashBytes) throws java.io.IOException
Index
remove
in interface Index<ReferenceType extends Reference>
urlHashBytes
- the key for the reference entry to be removedjava.io.IOException
public java.util.Iterator<ReferenceContainer<ReferenceType>> iterator()
iterator
in interface java.lang.Iterable<ReferenceContainer<ReferenceType extends Reference>>
public CloneableIterator<Rating<byte[]>> referenceCountIterator(byte[] starttermHash, boolean rot, boolean excludePrivate)
Index
referenceCountIterator
in interface Index<ReferenceType extends Reference>
rot
- if true, then rotate at the end to the beginningpublic CloneableIterator<ReferenceContainer<ReferenceType>> referenceContainerIterator(byte[] startTermHash, boolean rot, boolean excludePrivate)
Index
referenceContainerIterator
in interface Index<ReferenceType extends Reference>
rot
- if true, then rotate at the end to the beginningpublic CloneableIterator<ReferenceContainer<ReferenceType>> referenceContainerIterator(byte[] startTermHash, boolean rot, boolean excludePrivate, boolean ram)
BufferedIndex
referenceContainerIterator
in interface BufferedIndex<ReferenceType extends Reference>
public void clear() throws java.io.IOException
clear
in interface Index<ReferenceType extends Reference>
java.io.IOException
public void clearCache()
public void close()
close
in interface Index<ReferenceType extends Reference>
public boolean isEmpty()
public int size()
Index
size
in interface Index<ReferenceType extends Reference>
private int[] sizes()
public int sizesMax()
public int getSegmentCount()
public int minMem()
Index
minMem
in interface Index<ReferenceType extends Reference>
public ByteOrder termKeyOrdering()
Index
termKeyOrdering
in interface Index<ReferenceType extends Reference>
public long getBufferMaxAge()
BufferedIndex
getBufferMaxAge
in interface BufferedIndex<ReferenceType extends Reference>
public int getBufferMaxReferences()
BufferedIndex
getBufferMaxReferences
in interface BufferedIndex<ReferenceType extends Reference>
public long getBufferMinAge()
BufferedIndex
getBufferMinAge
in interface BufferedIndex<ReferenceType extends Reference>
public int getBufferSize()
BufferedIndex
getBufferSize
in interface BufferedIndex<ReferenceType extends Reference>
public long getBufferSizeBytes()
BufferedIndex
getBufferSizeBytes
in interface BufferedIndex<ReferenceType extends Reference>
public void setBufferMaxWordCount(int maxWords)
BufferedIndex
setBufferMaxWordCount
in interface BufferedIndex<ReferenceType extends Reference>