java.lang.Object
org.apache.lucene.analysis.en.KStemmer
This class implements the Kstem algorithm
-
Nested Class Summary
Nested Classes -
Field Summary
FieldsModifier and TypeFieldDescriptionprivate static char[]
private static final String[][]
private static final CharArrayMap<KStemmer.DictEntry>
private static final String[][]
private static final String[]
private static char[]
private static char[]
private static char[]
private int
private int
(package private) KStemmer.DictEntry
private static final int
private static final String[]
(package private) String
private static final String[]
private final OpenStringBuilder
caching off private int maxCacheSize; private CharArrayMapcache = null; private static final String SAME = "SAME"; // use if stemmed form is the same -
Constructor Summary
Constructors -
Method Summary
Modifier and TypeMethodDescriptionprivate void
(package private) CharSequence
private void
aspect()
(package private) String
asString()
Returns the result of the stem (assuming the word was changed) as a String.private void
private boolean
doubleC
(int i) private boolean
endsIn
(char[] s) private boolean
endsIn
(char a, char b) private boolean
endsIn
(char a, char b, char c) private boolean
endsIn
(char a, char b, char c, char d) private void
(package private) char[]
getChars()
(package private) int
(package private) String
private void
private static CharArrayMap<KStemmer.DictEntry>
private void
private boolean
isAlpha
(char ch) private boolean
isCons
(int index) private void
private boolean
isVowel
(int index) private void
private void
private void
private boolean
lookup()
private void
private boolean
matched()
private void
private void
private void
private void
private void
private char
private void
plural()
private void
private void
(package private) boolean
stem
(char[] term, int len) Stems the text in the token.(package private) String
private int
private boolean
private KStemmer.DictEntry
-
Field Details
-
MaxWordLen
private static final int MaxWordLen- See Also:
-
exceptionWords
-
directConflations
-
countryNationality
-
supplementDict
-
properNouns
-
dict_ht
-
word
caching off private int maxCacheSize; private CharArrayMapcache = null; private static final String SAME = "SAME"; // use if stemmed form is the same -
j
private int j -
k
private int k -
matchedEntry
KStemmer.DictEntry matchedEntry -
ization
private static char[] ization -
ition
private static char[] ition -
ation
private static char[] ation -
ication
private static char[] ication -
result
String result
-
-
Constructor Details
-
KStemmer
KStemmer()
-
-
Method Details
-
penultChar
private char penultChar() -
isVowel
private boolean isVowel(int index) -
isCons
private boolean isCons(int index) -
initializeDictHash
-
isAlpha
private boolean isAlpha(char ch) -
stemLength
private int stemLength() -
endsIn
private boolean endsIn(char[] s) -
endsIn
private boolean endsIn(char a, char b) -
endsIn
private boolean endsIn(char a, char b, char c) -
endsIn
private boolean endsIn(char a, char b, char c, char d) -
wordInDict
-
plural
private void plural() -
setSuffix
-
setSuff
-
lookup
private boolean lookup() -
pastTense
private void pastTense() -
doubleC
private boolean doubleC(int i) -
vowelInStem
private boolean vowelInStem() -
aspect
private void aspect() -
ityEndings
private void ityEndings() -
nceEndings
private void nceEndings() -
nessEndings
private void nessEndings() -
ismEndings
private void ismEndings() -
mentEndings
private void mentEndings() -
izeEndings
private void izeEndings() -
ncyEndings
private void ncyEndings() -
bleEndings
private void bleEndings() -
icEndings
private void icEndings() -
ionEndings
private void ionEndings() -
erAndOrEndings
private void erAndOrEndings() -
lyEndings
private void lyEndings() -
alEndings
private void alEndings() -
iveEndings
private void iveEndings() -
stem
-
asString
String asString()Returns the result of the stem (assuming the word was changed) as a String. -
asCharSequence
CharSequence asCharSequence() -
getString
String getString() -
getChars
char[] getChars() -
getLength
int getLength() -
matched
private boolean matched() -
stem
boolean stem(char[] term, int len) Stems the text in the token. Returns true if changed.
-