diff --git a/src/main/java/com/graphaware/nlp/procedure/NLPProcedure.java b/src/main/java/com/graphaware/nlp/procedure/NLPProcedure.java index 2d89915..eccb0b5 100644 --- a/src/main/java/com/graphaware/nlp/procedure/NLPProcedure.java +++ b/src/main/java/com/graphaware/nlp/procedure/NLPProcedure.java @@ -96,7 +96,7 @@ public RawIterator apply(Context ctx, Object[] inp Map inputParams = (Map) input[0]; String text = (String) inputParams.get(PARAMETER_NAME_TEXT); boolean checkForLanguage = (Boolean) inputParams.getOrDefault(PARAMETER_NAME_LANGUAGE_CHECK, true); - LOG.warn("Text: " + text); + LOG.info("Text: " + text); if (text == null || (checkForLanguage && !LanguageManager.getInstance().isTextLanguageSupported(text))) { LOG.info("text is null or language not supported or unable to detect the language"); return Iterators.asRawIterator(Collections.emptyIterator()); diff --git a/src/main/java/com/graphaware/nlp/processor/PipelineBuilder.java b/src/main/java/com/graphaware/nlp/processor/PipelineBuilder.java index dc8eec7..349a777 100644 --- a/src/main/java/com/graphaware/nlp/processor/PipelineBuilder.java +++ b/src/main/java/com/graphaware/nlp/processor/PipelineBuilder.java @@ -9,6 +9,7 @@ import java.util.Properties; public class PipelineBuilder { + private static final String CUSTOM_STOP_WORD_LIST = "start,starts,period,periods,a,an,and,are,as,at,be,but,by,for,if,in,into,is,it,no,not,of,o,on,or,such,that,the,their,then,there,these,they,this,to,was,will,with"; private final Properties properties = new Properties(); @@ -57,6 +58,21 @@ public PipelineBuilder defaultStopWordAnnotator() { return this; } + public PipelineBuilder customStopWordAnnotator(String customStopWordList) { + checkForExistingAnnotators(); + String stopWordList; + if (annotattors.indexOf("stopword") >= 0) { + String alreadyexistingStopWordList = properties.getProperty(StopwordAnnotator.STOPWORDS_LIST); + stopWordList = alreadyexistingStopWordList + "," + customStopWordList; + } else { + annotattors.append("stopword"); + properties.setProperty("customAnnotatorClass.stopword", "com.graphaware.nlp.processor.StopwordAnnotator"); + stopWordList = customStopWordList; + } + properties.setProperty(StopwordAnnotator.STOPWORDS_LIST, stopWordList); + return this; + } + public PipelineBuilder stopWordAnnotator(Properties properties) { properties.entrySet().stream().forEach((entry) -> { this.properties.setProperty((String) entry.getKey(), (String) entry.getValue());