7#ifndef NGRAMTOKENIZER_H
8#define NGRAMTOKENIZER_H
51 void init(int32_t minGram, int32_t maxGram);
#define LUCENE_CLASS(Name)
Definition LuceneObject.h:24
Tokenizes the input into n-grams of the given size(s).
Definition NGramTokenizer.h:19
int32_t bufferEnd
Definition NGramTokenizer.h:37
int32_t lastCheckedChar
Definition NGramTokenizer.h:43
OffsetAttributePtr offsetAtt
Definition NGramTokenizer.h:47
PositionIncrementAttributePtr posIncrAtt
Definition NGramTokenizer.h:48
bool exhausted
Definition NGramTokenizer.h:42
int32_t gramSize
Definition NGramTokenizer.h:39
int32_t offset
Definition NGramTokenizer.h:38
virtual bool incrementToken()
Consumers (ie., IndexWriter) use this method to advance the stream to the next token....
TermAttributePtr termAtt
Definition NGramTokenizer.h:46
virtual ~NGramTokenizer()
void updateLastNonTokenChar()
virtual void reset()
Resets this stream to the beginning. This is an optional operation, so subclasses may or may not impl...
NGramTokenizer(const ReaderPtr &input)
int32_t bufferStart
Definition NGramTokenizer.h:36
int32_t maxGram
Definition NGramTokenizer.h:41
NGramTokenizer(const AttributeFactoryPtr &factory, const ReaderPtr &input, int32_t minGram, int32_t maxGram)
int32_t minGram
Definition NGramTokenizer.h:40
virtual void end()
This method is called by the consumer after the last token has been consumed, after incrementToken() ...
CharArray buffer
Definition NGramTokenizer.h:35
virtual void reset(const ReaderPtr &input)
Reset the tokenizer to a new reader. Typically, an analyzer (in its reusableTokenStream method) will ...
static const int32_t DEFAULT_MAX_NGRAM_SIZE
Definition NGramTokenizer.h:22
NGramTokenizer(const AttributeSourcePtr &source, const ReaderPtr &input, int32_t minGram, int32_t maxGram)
void init(int32_t minGram, int32_t maxGram)
virtual bool isTokenChar(wchar_t chr)
Only collect characters which satisfy this condition.
int32_t lastNonTokenChar
Definition NGramTokenizer.h:44
static const int32_t DEFAULT_MIN_NGRAM_SIZE
Definition NGramTokenizer.h:21
NGramTokenizer(const ReaderPtr &input, int32_t minGram, int32_t maxGram)
A Tokenizer is a TokenStream whose input is a Reader.
Definition Tokenizer.h:20
Definition AbstractAllTermDocs.h:12
boost::shared_ptr< AttributeSource > AttributeSourcePtr
Definition LuceneTypes.h:523
boost::shared_ptr< PositionIncrementAttribute > PositionIncrementAttributePtr
Definition LuceneTypes.h:48
boost::shared_ptr< TermAttribute > TermAttributePtr
Definition LuceneTypes.h:61
boost::shared_ptr< OffsetAttribute > OffsetAttributePtr
Definition LuceneTypes.h:43
boost::shared_ptr< Reader > ReaderPtr
Definition LuceneTypes.h:550
boost::shared_ptr< AttributeFactory > AttributeFactoryPtr
Definition LuceneTypes.h:522