Lucene++ - a full-featured, c++ search engine
API Documentation


Loading...
Searching...
No Matches
NGramTokenFilter.h
Go to the documentation of this file.
1
2// Copyright (c) 2009-2014 Alan Wright. All rights reserved.
3// Distributable under the terms of either the Apache License (Version 2.0)
4// or the GNU Lesser General Public License.
6
7#ifndef NGRAMTOKENFILTER_H
8#define NGRAMTOKENFILTER_H
9
10#include "TokenFilter.h"
11
12namespace Lucene {
13
18class LPPAPI NGramTokenFilter : public TokenFilter {
19public:
20 static const bool DEFAULT_PRESERVE_ORIGINAL;
21
22public:
23 NGramTokenFilter(const TokenStreamPtr& input, int32_t gramSize);
24 NGramTokenFilter(const TokenStreamPtr& input, int32_t minGram, int32_t maxGram);
25 NGramTokenFilter(const TokenStreamPtr& input, int32_t minGram, int32_t maxGram, bool preserveOriginal);
26
28
30
31protected:
32 int32_t minGram;
33 int32_t maxGram;
35
36 CharArray curTermBuffer;
38 int32_t curGramSize;
39 int32_t curPos;
40 int32_t curPosIncr;
42
45
46protected:
47 void init(int32_t minGram, int32_t maxGram, bool preserveOriginal);
48
49public:
50 virtual bool incrementToken();
51 virtual void reset();
52 virtual void end();
53};
54
55}
56
57#endif
#define LUCENE_CLASS(Name)
Definition LuceneObject.h:24
Tokenizes each input token into n-grams of the given size(s).
Definition NGramTokenFilter.h:18
NGramTokenFilter(const TokenStreamPtr &input, int32_t minGram, int32_t maxGram)
NGramTokenFilter(const TokenStreamPtr &input, int32_t gramSize)
virtual void end()
Performs end-of-stream operations, if any, and calls then end() on the input TokenStream....
int32_t minGram
Definition NGramTokenFilter.h:32
virtual bool incrementToken()
Consumers (ie., IndexWriter) use this method to advance the stream to the next token....
TermAttributePtr termAtt
Definition NGramTokenFilter.h:43
NGramTokenFilter(const TokenStreamPtr &input, int32_t minGram, int32_t maxGram, bool preserveOriginal)
AttributeSourceStatePtr state
Definition NGramTokenFilter.h:41
int32_t curGramSize
Definition NGramTokenFilter.h:38
static const bool DEFAULT_PRESERVE_ORIGINAL
Definition NGramTokenFilter.h:20
int32_t curPosIncr
Definition NGramTokenFilter.h:40
int32_t curPos
Definition NGramTokenFilter.h:39
int32_t maxGram
Definition NGramTokenFilter.h:33
bool preserveOriginal
Definition NGramTokenFilter.h:34
CharArray curTermBuffer
Definition NGramTokenFilter.h:36
int32_t curTermLength
Definition NGramTokenFilter.h:37
void init(int32_t minGram, int32_t maxGram, bool preserveOriginal)
PositionIncrementAttributePtr posIncrAtt
Definition NGramTokenFilter.h:44
virtual void reset()
Reset the filter as well as the input TokenStream.
A TokenFilter is a TokenStream whose input is another TokenStream.
Definition TokenFilter.h:18
Definition AbstractAllTermDocs.h:12
boost::shared_ptr< PositionIncrementAttribute > PositionIncrementAttributePtr
Definition LuceneTypes.h:48
boost::shared_ptr< TokenStream > TokenStreamPtr
Definition LuceneTypes.h:66
boost::shared_ptr< TermAttribute > TermAttributePtr
Definition LuceneTypes.h:61
boost::shared_ptr< AttributeSourceState > AttributeSourceStatePtr
Definition LuceneTypes.h:524

clucene.sourceforge.net