final class SimpleTextBKDWriter
extends java.lang.Object
implements java.io.Closeable
BKDWriter
and simplified/specialized for SimpleText's usageModifier and Type | Class and Description |
---|---|
private class |
SimpleTextBKDWriter.OneDimensionBKDWriter |
Modifier and Type | Field and Description |
---|---|
protected int |
bytesPerDim
How many bytes each value in each dimension takes.
|
private int |
bytesPerDoc
How many bytes each docs takes in the fixed-width offline format
|
static java.lang.String |
CODEC_NAME |
(package private) int[] |
commonPrefixLengths |
static float |
DEFAULT_MAX_MB_SORT_IN_HEAP
Default maximum heap to use, before spilling to (slower) disk
|
static int |
DEFAULT_MAX_POINTS_IN_LEAF_NODE
Default maximum number of point in each leaf block
|
protected FixedBitSet |
docsSeen |
private boolean |
finished |
static int |
MAX_DIMS
Maximum number of dimensions
|
private int |
maxDoc |
(package private) double |
maxMBSortInHeap |
protected byte[] |
maxPackedValue
Maximum per-dim values, packed
|
protected int |
maxPointsInLeafNode |
private int |
maxPointsSortInHeap |
protected byte[] |
minPackedValue
Minimum per-dim values, packed
|
protected int |
numDataDims
How many dimensions we are storing at the leaf (data) nodes
|
protected int |
numIndexDims
How many dimensions we are indexing in the internal nodes
|
protected int |
packedBytesLength
numDataDims * bytesPerDim
|
protected int |
packedIndexBytesLength
numIndexDims * bytesPerDim
|
protected long |
pointCount |
private PointWriter |
pointWriter |
(package private) BytesRefBuilder |
scratch |
(package private) byte[] |
scratch1 |
(package private) byte[] |
scratch2 |
(package private) BytesRef |
scratchBytesRef1 |
(package private) BytesRef |
scratchBytesRef2 |
(package private) byte[] |
scratchDiff |
(package private) TrackingDirectoryWrapper |
tempDir |
(package private) java.lang.String |
tempFileNamePrefix |
private IndexOutput |
tempInput |
private long |
totalPointCount
An upper bound on how many points the caller will add (includes deletions)
|
static int |
VERSION_COMPRESSED_DOC_IDS |
static int |
VERSION_COMPRESSED_VALUES |
static int |
VERSION_CURRENT |
static int |
VERSION_IMPLICIT_SPLIT_DIM_1D |
static int |
VERSION_START |
Constructor and Description |
---|
SimpleTextBKDWriter(int maxDoc,
Directory tempDir,
java.lang.String tempFileNamePrefix,
int numDataDims,
int numIndexDims,
int bytesPerDim,
int maxPointsInLeafNode,
double maxMBSortInHeap,
long totalPointCount) |
Modifier and Type | Method and Description |
---|---|
void |
add(byte[] packedValue,
int docID) |
private void |
build(int nodeID,
int leafNodeOffset,
BKDRadixSelector.PathSlice points,
IndexOutput out,
BKDRadixSelector radixSelector,
byte[] minPackedValue,
byte[] maxPackedValue,
byte[] splitPackedValues,
long[] leafBlockFPs,
int[] spareDocIds)
The array (sized numDims) of PathSlice describe the cell we have currently recursed to.
|
private void |
build(int nodeID,
int leafNodeOffset,
MutablePointValues reader,
int from,
int to,
IndexOutput out,
byte[] minPackedValue,
byte[] maxPackedValue,
byte[] splitPackedValues,
long[] leafBlockFPs,
int[] spareDocIds) |
private void |
checkMaxLeafNodeCount(int numLeaves) |
void |
close() |
private void |
computeCommonPrefixLength(HeapPointWriter heapPointWriter,
byte[] commonPrefix) |
long |
finish(IndexOutput out)
Writes the BKD tree to the provided
IndexOutput and returns the file offset where index was written. |
long |
getPointCount()
How many points have been added so far
|
private void |
newline(IndexOutput out) |
private void |
rotateToTree(int nodeID,
int offset,
int count,
byte[] index,
java.util.List<byte[]> leafBlockStartValues) |
private static int |
runLen(java.util.function.IntFunction<BytesRef> packedValues,
int start,
int end,
int byteOffset) |
protected int |
split(byte[] minPackedValue,
byte[] maxPackedValue) |
private HeapPointWriter |
switchToHeap(PointWriter source)
Pull a partition back into heap once the point count is low enough while recursing.
|
private boolean |
valueInBounds(BytesRef packedValue,
byte[] minPackedValue,
byte[] maxPackedValue)
Called only in assert
|
private boolean |
valueInOrder(long ord,
int sortedDim,
byte[] lastPackedValue,
byte[] packedValue,
int packedValueOffset,
int doc,
int lastDoc) |
private boolean |
valuesInOrderAndBounds(int count,
int sortedDim,
byte[] minPackedValue,
byte[] maxPackedValue,
java.util.function.IntFunction<BytesRef> values,
int[] docs,
int docsOffset) |
private java.lang.Error |
verifyChecksum(java.lang.Throwable priorException,
PointWriter writer)
Called on exception, to check whether the checksum is also corrupt in this source, and add that
information (checksum matched or didn't) as a suppressed exception.
|
static void |
verifyParams(int numDataDims,
int numIndexDims,
int maxPointsInLeafNode,
double maxMBSortInHeap,
long totalPointCount) |
private void |
write(IndexOutput out,
BytesRef b) |
private void |
write(IndexOutput out,
java.lang.String s) |
long |
writeField(IndexOutput out,
java.lang.String fieldName,
MutablePointValues reader)
Write a field from a
MutablePointValues . |
private long |
writeField1Dim(IndexOutput out,
java.lang.String fieldName,
MutablePointValues reader) |
private long |
writeFieldNDims(IndexOutput out,
java.lang.String fieldName,
MutablePointValues values) |
private void |
writeIndex(IndexOutput out,
long[] leafBlockFPs,
byte[] splitPackedValues)
Subclass can change how it writes the index.
|
private void |
writeInt(IndexOutput out,
int x) |
protected void |
writeLeafBlockDocs(IndexOutput out,
int[] docIDs,
int start,
int count) |
protected void |
writeLeafBlockPackedValues(IndexOutput out,
int[] commonPrefixLengths,
int count,
int sortedDim,
java.util.function.IntFunction<BytesRef> packedValues) |
private void |
writeLeafBlockPackedValuesRange(IndexOutput out,
int[] commonPrefixLengths,
int start,
int end,
java.util.function.IntFunction<BytesRef> packedValues) |
private void |
writeLong(IndexOutput out,
long x) |
public static final java.lang.String CODEC_NAME
public static final int VERSION_START
public static final int VERSION_COMPRESSED_DOC_IDS
public static final int VERSION_COMPRESSED_VALUES
public static final int VERSION_IMPLICIT_SPLIT_DIM_1D
public static final int VERSION_CURRENT
private final int bytesPerDoc
public static final int DEFAULT_MAX_POINTS_IN_LEAF_NODE
public static final float DEFAULT_MAX_MB_SORT_IN_HEAP
public static final int MAX_DIMS
protected final int numDataDims
protected final int numIndexDims
protected final int bytesPerDim
protected final int packedBytesLength
protected final int packedIndexBytesLength
final BytesRefBuilder scratch
final TrackingDirectoryWrapper tempDir
final java.lang.String tempFileNamePrefix
final double maxMBSortInHeap
final byte[] scratchDiff
final byte[] scratch1
final byte[] scratch2
final BytesRef scratchBytesRef1
final BytesRef scratchBytesRef2
final int[] commonPrefixLengths
protected final FixedBitSet docsSeen
private PointWriter pointWriter
private boolean finished
private IndexOutput tempInput
protected final int maxPointsInLeafNode
private final int maxPointsSortInHeap
protected final byte[] minPackedValue
protected final byte[] maxPackedValue
protected long pointCount
private final long totalPointCount
private final int maxDoc
public SimpleTextBKDWriter(int maxDoc, Directory tempDir, java.lang.String tempFileNamePrefix, int numDataDims, int numIndexDims, int bytesPerDim, int maxPointsInLeafNode, double maxMBSortInHeap, long totalPointCount) throws java.io.IOException
java.io.IOException
public static void verifyParams(int numDataDims, int numIndexDims, int maxPointsInLeafNode, double maxMBSortInHeap, long totalPointCount)
public void add(byte[] packedValue, int docID) throws java.io.IOException
java.io.IOException
public long getPointCount()
public long writeField(IndexOutput out, java.lang.String fieldName, MutablePointValues reader) throws java.io.IOException
MutablePointValues
. This way of writing
points is faster than regular writes with BKDWriter.add(byte[], int)
since
there is opportunity for reordering points before writing them to
disk. This method does not use transient disk in order to reorder points.java.io.IOException
private long writeFieldNDims(IndexOutput out, java.lang.String fieldName, MutablePointValues values) throws java.io.IOException
java.io.IOException
private long writeField1Dim(IndexOutput out, java.lang.String fieldName, MutablePointValues reader) throws java.io.IOException
java.io.IOException
private void rotateToTree(int nodeID, int offset, int count, byte[] index, java.util.List<byte[]> leafBlockStartValues)
private void checkMaxLeafNodeCount(int numLeaves)
public long finish(IndexOutput out) throws java.io.IOException
IndexOutput
and returns the file offset where index was written.java.io.IOException
private void writeIndex(IndexOutput out, long[] leafBlockFPs, byte[] splitPackedValues) throws java.io.IOException
java.io.IOException
protected void writeLeafBlockDocs(IndexOutput out, int[] docIDs, int start, int count) throws java.io.IOException
java.io.IOException
protected void writeLeafBlockPackedValues(IndexOutput out, int[] commonPrefixLengths, int count, int sortedDim, java.util.function.IntFunction<BytesRef> packedValues) throws java.io.IOException
java.io.IOException
private void writeLeafBlockPackedValuesRange(IndexOutput out, int[] commonPrefixLengths, int start, int end, java.util.function.IntFunction<BytesRef> packedValues) throws java.io.IOException
java.io.IOException
private static int runLen(java.util.function.IntFunction<BytesRef> packedValues, int start, int end, int byteOffset)
public void close() throws java.io.IOException
close
in interface java.io.Closeable
close
in interface java.lang.AutoCloseable
java.io.IOException
private java.lang.Error verifyChecksum(java.lang.Throwable priorException, PointWriter writer) throws java.io.IOException
java.io.IOException
private boolean valueInBounds(BytesRef packedValue, byte[] minPackedValue, byte[] maxPackedValue)
protected int split(byte[] minPackedValue, byte[] maxPackedValue)
private HeapPointWriter switchToHeap(PointWriter source) throws java.io.IOException
java.io.IOException
private void build(int nodeID, int leafNodeOffset, MutablePointValues reader, int from, int to, IndexOutput out, byte[] minPackedValue, byte[] maxPackedValue, byte[] splitPackedValues, long[] leafBlockFPs, int[] spareDocIds) throws java.io.IOException
java.io.IOException
private void build(int nodeID, int leafNodeOffset, BKDRadixSelector.PathSlice points, IndexOutput out, BKDRadixSelector radixSelector, byte[] minPackedValue, byte[] maxPackedValue, byte[] splitPackedValues, long[] leafBlockFPs, int[] spareDocIds) throws java.io.IOException
java.io.IOException
private void computeCommonPrefixLength(HeapPointWriter heapPointWriter, byte[] commonPrefix)
private boolean valuesInOrderAndBounds(int count, int sortedDim, byte[] minPackedValue, byte[] maxPackedValue, java.util.function.IntFunction<BytesRef> values, int[] docs, int docsOffset) throws java.io.IOException
java.io.IOException
private boolean valueInOrder(long ord, int sortedDim, byte[] lastPackedValue, byte[] packedValue, int packedValueOffset, int doc, int lastDoc)
private void write(IndexOutput out, java.lang.String s) throws java.io.IOException
java.io.IOException
private void writeInt(IndexOutput out, int x) throws java.io.IOException
java.io.IOException
private void writeLong(IndexOutput out, long x) throws java.io.IOException
java.io.IOException
private void write(IndexOutput out, BytesRef b) throws java.io.IOException
java.io.IOException
private void newline(IndexOutput out) throws java.io.IOException
java.io.IOException