public class Analysis extends Object
Modifier and Type | Field and Description |
---|---|
static com.google.common.collect.ImmutableMap<String,Set<?>> |
namedStopWords |
Constructor and Description |
---|
Analysis() |
Modifier and Type | Method and Description |
---|---|
static boolean |
generatesCharacterTokenStream(org.apache.lucene.analysis.Analyzer analyzer,
String fieldName)
Check whether
TokenStream s generated with analyzer
provide with character terms. |
static Reader |
getReaderFromFile(Environment env,
Settings settings,
String settingPrefix) |
static List<String> |
getWordList(Environment env,
Settings settings,
String settingPrefix)
Fetches a list of words from the specified settings file.
|
static org.apache.lucene.analysis.util.CharArraySet |
getWordSet(Environment env,
Settings settings,
String settingsPrefix,
org.apache.lucene.util.Version version) |
static boolean |
isCharacterTokenStream(org.apache.lucene.analysis.TokenStream tokenStream)
Check whether the provided token stream is able to provide character
terms.
|
static boolean |
isNoStopwords(Settings settings) |
static List<String> |
loadWordList(Reader reader,
String comment) |
static org.apache.lucene.util.Version |
parseAnalysisVersion(Settings indexSettings,
Settings settings,
ESLogger logger) |
static org.apache.lucene.analysis.util.CharArraySet |
parseArticles(Environment env,
Settings settings,
org.apache.lucene.util.Version version) |
static org.apache.lucene.analysis.util.CharArraySet |
parseCommonWords(Environment env,
Settings settings,
org.apache.lucene.analysis.util.CharArraySet defaultCommonWords,
org.apache.lucene.util.Version version,
boolean ignoreCase) |
static org.apache.lucene.analysis.util.CharArraySet |
parseStemExclusion(Settings settings,
org.apache.lucene.analysis.util.CharArraySet defaultStemExclusion,
org.apache.lucene.util.Version version) |
static org.apache.lucene.analysis.util.CharArraySet |
parseStopWords(Environment env,
Settings settings,
org.apache.lucene.analysis.util.CharArraySet defaultStopWords,
org.apache.lucene.util.Version version) |
static org.apache.lucene.analysis.util.CharArraySet |
parseStopWords(Environment env,
Settings settings,
org.apache.lucene.analysis.util.CharArraySet defaultStopWords,
org.apache.lucene.util.Version version,
boolean ignoreCase) |
static org.apache.lucene.analysis.util.CharArraySet |
parseWords(Environment env,
Settings settings,
String name,
org.apache.lucene.analysis.util.CharArraySet defaultWords,
com.google.common.collect.ImmutableMap<String,Set<?>> namedWords,
org.apache.lucene.util.Version version,
boolean ignoreCase) |
public static org.apache.lucene.util.Version parseAnalysisVersion(@IndexSettings Settings indexSettings, Settings settings, ESLogger logger)
public static boolean isNoStopwords(Settings settings)
public static org.apache.lucene.analysis.util.CharArraySet parseStemExclusion(Settings settings, org.apache.lucene.analysis.util.CharArraySet defaultStemExclusion, org.apache.lucene.util.Version version)
public static org.apache.lucene.analysis.util.CharArraySet parseWords(Environment env, Settings settings, String name, org.apache.lucene.analysis.util.CharArraySet defaultWords, com.google.common.collect.ImmutableMap<String,Set<?>> namedWords, org.apache.lucene.util.Version version, boolean ignoreCase)
public static org.apache.lucene.analysis.util.CharArraySet parseCommonWords(Environment env, Settings settings, org.apache.lucene.analysis.util.CharArraySet defaultCommonWords, org.apache.lucene.util.Version version, boolean ignoreCase)
public static org.apache.lucene.analysis.util.CharArraySet parseArticles(Environment env, Settings settings, org.apache.lucene.util.Version version)
public static org.apache.lucene.analysis.util.CharArraySet parseStopWords(Environment env, Settings settings, org.apache.lucene.analysis.util.CharArraySet defaultStopWords, org.apache.lucene.util.Version version)
public static org.apache.lucene.analysis.util.CharArraySet parseStopWords(Environment env, Settings settings, org.apache.lucene.analysis.util.CharArraySet defaultStopWords, org.apache.lucene.util.Version version, boolean ignoreCase)
public static org.apache.lucene.analysis.util.CharArraySet getWordSet(Environment env, Settings settings, String settingsPrefix, org.apache.lucene.util.Version version)
public static List<String> getWordList(Environment env, Settings settings, String settingPrefix)
ElasticsearchIllegalArgumentException
- If the word list cannot be found at either key.public static List<String> loadWordList(Reader reader, String comment) throws IOException
IOException
public static Reader getReaderFromFile(Environment env, Settings settings, String settingPrefix)
null
.ElasticsearchIllegalArgumentException
- If the Reader can not be instantiated.public static boolean isCharacterTokenStream(org.apache.lucene.analysis.TokenStream tokenStream)
Although most analyzers generate character terms (CharTermAttribute),
some token only contain binary terms (BinaryTermAttribute,
CharTermAttribute being a special type of BinaryTermAttribute), such as
NumericTokenStream
and unsuitable for highlighting and
more-like-this queries which expect character terms.
public static boolean generatesCharacterTokenStream(org.apache.lucene.analysis.Analyzer analyzer, String fieldName) throws IOException
TokenStream
s generated with analyzer
provide with character terms.IOException
isCharacterTokenStream(TokenStream)
Copyright © 2009–2015. All rights reserved.