org.apache.uima.conceptMapper.support.tokens
Class TokenFilter

java.lang.Object
  extended by org.apache.uima.conceptMapper.support.tokens.TokenFilter

public class TokenFilter
extends java.lang.Object


Field Summary
protected  java.util.HashSet<java.lang.String> excludedTokenClasses
           
protected  java.util.HashSet<java.lang.Integer> excludedTokenTypes
           
protected  java.util.HashSet<java.lang.String> includedTokenClasses
           
protected  java.util.HashSet<java.lang.Integer> includedTokenTypes
           
static java.lang.String PARAM_EXCLUDEDTOKENCLASSES
          Configuration parameter for list of token classes to include in lookups
static java.lang.String PARAM_EXCLUDEDTOKENTYPES
          Configuration parameter for list of token classes to include in lookups
static java.lang.String PARAM_INCLUDEDTOKENCLASSES
          Configuration parameter for list of token classes to include in lookups
static java.lang.String PARAM_INCLUDEDTOKENTYPES
          Configuration parameter for list of token classes to include in lookups
static java.lang.String PARAM_STOPWORDS
           
static java.lang.String PARAM_TOKENANNOTATION
          Configuration parameter giving type of tokens
 
Constructor Summary
TokenFilter(java.lang.String tokenAnnotationName, java.lang.String tokenTypeFeatureName, java.lang.String tokenClassFeatureName, Logger logger)
           
 
Method Summary
 boolean checkTokenClass(org.apache.uima.cas.text.AnnotationFS token)
           
 boolean checkTokenClass(DictionaryToken token)
           
 boolean checkTokenType(org.apache.uima.cas.text.AnnotationFS token)
           
 boolean checkTokenType(DictionaryToken token)
           
 java.lang.String getTokenAnnotationName()
           
 org.apache.uima.cas.Feature getTokenClassFeature()
           
 java.lang.String getTokenClassFeatureName()
           
 org.apache.uima.cas.Feature getTokenTypeFeature()
           
 java.lang.String getTokenTypeFeatureName()
           
 void initConfig(org.apache.uima.analysis_engine.annotator.AnnotatorContext annotatorContext)
           
static java.util.Set<java.lang.String> initializeStopWordList(java.lang.String[] stopWordsStrings)
           
 void initTypes(org.apache.uima.cas.TypeSystem typeSystem)
           
 void initTypes(org.apache.uima.cas.TypeSystem typeSystem, boolean requireFeatureExistence)
           
 boolean isOK_Token(org.apache.uima.cas.text.AnnotationFS token, TokenNormalizer tokenNormalizer)
           
 boolean isOK_Token(DictionaryToken token, TokenNormalizer tokenNormalizer)
           
static boolean isStopWord(java.util.Set<java.lang.String> stopWords, java.lang.String tokenText)
           
 boolean isStopWord(java.lang.String tokenText)
           
 void setTokenAnnotationName(java.lang.String tokenAnnotationName)
           
 void setTokenClassFeature(org.apache.uima.cas.Feature tokenClassFeature)
           
 void setTokenClassFeatureName(java.lang.String tokenClassFeatureName)
           
 void setTokenTypeFeature(org.apache.uima.cas.Feature tokenTypeFeature)
           
 void setTokenTypeFeatureName(java.lang.String tokenTypeFeatureName)
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

PARAM_INCLUDEDTOKENCLASSES

public static final java.lang.String PARAM_INCLUDEDTOKENCLASSES
Configuration parameter for list of token classes to include in lookups

See Also:
Constant Field Values

includedTokenClasses

protected java.util.HashSet<java.lang.String> includedTokenClasses

PARAM_EXCLUDEDTOKENCLASSES

public static final java.lang.String PARAM_EXCLUDEDTOKENCLASSES
Configuration parameter for list of token classes to include in lookups

See Also:
Constant Field Values

excludedTokenClasses

protected java.util.HashSet<java.lang.String> excludedTokenClasses

PARAM_INCLUDEDTOKENTYPES

public static final java.lang.String PARAM_INCLUDEDTOKENTYPES
Configuration parameter for list of token classes to include in lookups

See Also:
Constant Field Values

includedTokenTypes

protected java.util.HashSet<java.lang.Integer> includedTokenTypes

PARAM_EXCLUDEDTOKENTYPES

public static final java.lang.String PARAM_EXCLUDEDTOKENTYPES
Configuration parameter for list of token classes to include in lookups

See Also:
Constant Field Values

excludedTokenTypes

protected java.util.HashSet<java.lang.Integer> excludedTokenTypes

PARAM_STOPWORDS

public static final java.lang.String PARAM_STOPWORDS
See Also:
Constant Field Values

PARAM_TOKENANNOTATION

public static final java.lang.String PARAM_TOKENANNOTATION
Configuration parameter giving type of tokens

See Also:
Constant Field Values
Constructor Detail

TokenFilter

public TokenFilter(java.lang.String tokenAnnotationName,
                   java.lang.String tokenTypeFeatureName,
                   java.lang.String tokenClassFeatureName,
                   Logger logger)
Method Detail

getTokenClassFeatureName

public java.lang.String getTokenClassFeatureName()

setTokenClassFeatureName

public void setTokenClassFeatureName(java.lang.String tokenClassFeatureName)

getTokenClassFeature

public org.apache.uima.cas.Feature getTokenClassFeature()

setTokenClassFeature

public void setTokenClassFeature(org.apache.uima.cas.Feature tokenClassFeature)

getTokenTypeFeatureName

public java.lang.String getTokenTypeFeatureName()

setTokenTypeFeatureName

public void setTokenTypeFeatureName(java.lang.String tokenTypeFeatureName)

getTokenTypeFeature

public org.apache.uima.cas.Feature getTokenTypeFeature()

setTokenTypeFeature

public void setTokenTypeFeature(org.apache.uima.cas.Feature tokenTypeFeature)

getTokenAnnotationName

public java.lang.String getTokenAnnotationName()

setTokenAnnotationName

public void setTokenAnnotationName(java.lang.String tokenAnnotationName)

initConfig

public void initConfig(org.apache.uima.analysis_engine.annotator.AnnotatorContext annotatorContext)
                throws org.apache.uima.analysis_engine.annotator.AnnotatorConfigurationException
Throws:
org.apache.uima.analysis_engine.annotator.AnnotatorConfigurationException

initializeStopWordList

public static java.util.Set<java.lang.String> initializeStopWordList(java.lang.String[] stopWordsStrings)
                                                              throws org.apache.uima.analysis_engine.annotator.AnnotatorContextException
Throws:
org.apache.uima.analysis_engine.annotator.AnnotatorContextException

checkTokenClass

public boolean checkTokenClass(org.apache.uima.cas.text.AnnotationFS token)
Parameters:
token - tokenClass to look up
Returns:
true if in includedTokenClasses or if both includedTokenClasses and excludedTokenClasses are unset, of if excludedTokenClasses does not contain an entry for tokenClass parameter

checkTokenClass

public boolean checkTokenClass(DictionaryToken token)

isStopWord

public static boolean isStopWord(java.util.Set<java.lang.String> stopWords,
                                 java.lang.String tokenText)

isStopWord

public boolean isStopWord(java.lang.String tokenText)

checkTokenType

public boolean checkTokenType(org.apache.uima.cas.text.AnnotationFS token)
Parameters:
token -
Returns:
false if tokenTypeFeature is set, and the token's tokenTypeFeature slot is set, but the value is not OK

checkTokenType

public boolean checkTokenType(DictionaryToken token)

initTypes

public void initTypes(org.apache.uima.cas.TypeSystem typeSystem)
               throws UnknownTypeException
Throws:
UnknownTypeException

initTypes

public void initTypes(org.apache.uima.cas.TypeSystem typeSystem,
                      boolean requireFeatureExistence)
               throws UnknownTypeException
Parameters:
typeSystem -
requireFeatureExistence - - if true, if the tokenType and/or tokenClass features of the tokenAnnotation are specified, they must exist. This is to allow for the situation where these features might not exist during dictionary loading, but are needed at annotator runtime
Throws:
UnknownTypeException

isOK_Token

public boolean isOK_Token(org.apache.uima.cas.text.AnnotationFS token,
                          TokenNormalizer tokenNormalizer)

isOK_Token

public boolean isOK_Token(DictionaryToken token,
                          TokenNormalizer tokenNormalizer)


Copyright © 2011. All Rights Reserved.