diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2845770..fc5f09b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,7 @@
### Changes
+- Add a spelling suggester ([#7](https://github.com/EvidentSolutions/raudikko/pull/7))
- Add enums for fields of Analysis where applicable. Breaking change if you were using the fields.
- Bump the minimum supported Java version to 17
- Update morphology to [d3f4a0](https://github.com/voikko/corevoikko/commit/d3f4a065aa89c322f9c2476ea4d777bc4ba9ac6f)
diff --git a/src/main/java/fi/evident/raudikko/Morphology.java b/src/main/java/fi/evident/raudikko/Morphology.java
index bc1198b..76a5110 100644
--- a/src/main/java/fi/evident/raudikko/Morphology.java
+++ b/src/main/java/fi/evident/raudikko/Morphology.java
@@ -35,6 +35,7 @@
import fi.evident.raudikko.internal.fst.UnweightedTransducer;
import fi.evident.raudikko.internal.fst.UnweightedVfstLoader;
import fi.evident.raudikko.internal.morphology.FinnishVfstAnalyzer;
+import fi.evident.raudikko.internal.suggestions.DefaultSpellingSuggester;
import org.jetbrains.annotations.NotNull;
import java.io.IOException;
@@ -85,4 +86,14 @@ private Morphology(@NotNull UnweightedTransducer transducer) {
public @NotNull Analyzer newAnalyzer(@NotNull AnalyzerConfiguration configuration) {
return new FinnishVfstAnalyzer(transducer, configuration);
}
+
+ /**
+ * Creates a new {@link SpellingSuggester} for this morphology.
+ *
+ * The suggester is a mutable object that can be used repeatedly, but may not be
+ * shared between threads.
+ */
+ public @NotNull SpellingSuggester newSpellingSuggester() {
+ return new DefaultSpellingSuggester(this);
+ }
}
diff --git a/src/main/java/fi/evident/raudikko/SpellingSuggester.java b/src/main/java/fi/evident/raudikko/SpellingSuggester.java
new file mode 100644
index 0000000..78251a9
--- /dev/null
+++ b/src/main/java/fi/evident/raudikko/SpellingSuggester.java
@@ -0,0 +1,48 @@
+/*
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 2.0 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * https://www.mozilla.org/en-US/MPL/2.0/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Libvoikko: Library of natural language processing tools.
+ * The Initial Developer of the Original Code is Harri Pitkänen .
+ * Portions created by the Initial Developer are Copyright (C) 2012
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Raudikko, the Java port of the Initial Code is Copyright (C) 2020 by
+ * Evident Solutions Oy. All Rights Reserved.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ */
+
+package fi.evident.raudikko;
+
+import org.jetbrains.annotations.NotNull;
+
+import java.util.List;
+
+/**
+ * Provides suggestions for misspelled words.
+ */
+public interface SpellingSuggester {
+
+ /**
+ * Given a word, provides a list of spelling suggestions for it.
+ */
+ @NotNull List provideSpellingSuggestions(@NotNull String word);
+}
diff --git a/src/main/java/fi/evident/raudikko/internal/suggestions/DefaultSpellingSuggester.java b/src/main/java/fi/evident/raudikko/internal/suggestions/DefaultSpellingSuggester.java
new file mode 100644
index 0000000..538e725
--- /dev/null
+++ b/src/main/java/fi/evident/raudikko/internal/suggestions/DefaultSpellingSuggester.java
@@ -0,0 +1,194 @@
+/*
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 2.0 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * https://www.mozilla.org/en-US/MPL/2.0/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Libvoikko: Library of natural language processing tools.
+ * The Initial Developer of the Original Code is Harri Pitkänen .
+ * Portions created by the Initial Developer are Copyright (C) 2012
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Raudikko, the Java port of the Initial Code is Copyright (C) 2020 by
+ * Evident Solutions Oy. All Rights Reserved.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ */
+
+package fi.evident.raudikko.internal.suggestions;
+
+import fi.evident.raudikko.Analyzer;
+import fi.evident.raudikko.AnalyzerConfiguration;
+import fi.evident.raudikko.Morphology;
+import fi.evident.raudikko.SpellingSuggester;
+import fi.evident.raudikko.internal.suggestions.Suggestion.SimpleSuggestion;
+import fi.evident.raudikko.internal.suggestions.Suggestion.SplitSuggestion;
+import fi.evident.raudikko.internal.utils.StringUtils;
+import org.jetbrains.annotations.NotNull;
+import org.jetbrains.annotations.Nullable;
+
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.function.BiFunction;
+import java.util.function.Function;
+import java.util.function.UnaryOperator;
+import java.util.stream.Stream;
+
+import static fi.evident.raudikko.internal.suggestions.Replacements.*;
+import static fi.evident.raudikko.internal.utils.StringUtils.isAllUpper;
+import static java.lang.Character.isUpperCase;
+import static java.util.Collections.emptyList;
+import static java.util.Comparator.comparing;
+
+public final class DefaultSpellingSuggester implements SpellingSuggester {
+
+ private final @NotNull SpellChecker spellChecker;
+
+ /**
+ * How many suggestions are returned to user
+ */
+ private static final int MAX_SUGGESTIONS_RETURNED = 5;
+
+ /**
+ * How many variations are generated for words?
+ */
+ private static final int MAX_VARIATIONS = 800;
+
+ /**
+ * Generate more suggestions than required so that sorting gets to pick the best ones
+ */
+ private static final int MAX_SUGGESTIONS_GENERATED = 3 * MAX_SUGGESTIONS_RETURNED;
+
+ private static final int MAX_WORD_SIZE = 255;
+ private static final @NotNull String COMMON_LETTERS = "aitesn";
+ private static final @NotNull String UNCOMMON_LETTERS = "ulkoämrvpyhjdögfbcw:xzqå'.";
+
+ private static final @NotNull List>> primaryGenerators = List.of(
+ simple(Stream::of),
+ simple(SuggestionGenerators::removeSoftHyphens)
+ );
+
+ private static final @NotNull List>> secondaryGenerators = List.of(
+ simple(SuggestionGenerators::vowelChange),
+ simple(SuggestionGenerators::replace, REPLACEMENTS_1_FULL),
+ simple(SuggestionGenerators::delete),
+ simple(SuggestionGenerators::insertHyphen),
+ simple(SuggestionGenerators::duplicateCharacters),
+ SuggestionGenerators::splitWord,
+ simple(SuggestionGenerators::replaceTwo, REPLACEMENTS_1),
+ simple(SuggestionGenerators::replace, REPLACEMENTS_2_FULL),
+ simple(SuggestionGenerators::insertion, COMMON_LETTERS),
+ simple(SuggestionGenerators::swap),
+ simple(SuggestionGenerators::replace, REPLACEMENTS_3_FULL),
+ simple(SuggestionGenerators::insertion, UNCOMMON_LETTERS),
+ simple(SuggestionGenerators::replace, REPLACEMENTS_4_FULL),
+ simple(SuggestionGenerators::replaceTwo, REPLACEMENTS_2),
+ simple(SuggestionGenerators::replaceTwo, REPLACEMENTS_3),
+ simple(SuggestionGenerators::replaceTwo, REPLACEMENTS_4),
+ simple(SuggestionGenerators::deleteTwo),
+ simple(SuggestionGenerators::replace, REPLACEMENTS_5_FULL)
+ );
+
+ public DefaultSpellingSuggester(@NotNull Morphology morphology) {
+ this.spellChecker = new SpellChecker(newAnalyzer(morphology));
+ }
+
+ @Override
+ public @NotNull List provideSpellingSuggestions(@NotNull String word) {
+ if (word.length() <= 1 || word.length() > MAX_WORD_SIZE)
+ return emptyList();
+
+ var capitalizer = capitalizer(word);
+
+ var results1 = generateSuggestions(word, primaryGenerators);
+ var results2 = generateSuggestions(word, secondaryGenerators);
+
+ return Stream.concat(results1, results2)
+ .map(capitalizer.compose(WordWithPriority::word))
+ .distinct()
+ .limit(MAX_SUGGESTIONS_RETURNED)
+ .toList();
+ }
+
+ private @NotNull Stream generateSuggestions(
+ @NotNull String word,
+ @NotNull List>> generators
+ ) {
+ AtomicInteger count = new AtomicInteger(0); // atomicity not really needed, just box for counter
+
+ return generators.stream()
+ .flatMap(g -> g.apply(word))
+ .distinct()
+ .limit(MAX_VARIATIONS)
+ .flatMap(s -> Stream.ofNullable(processSuggestion(s, spellChecker)))
+ .limit(MAX_SUGGESTIONS_GENERATED)
+ .map(s -> new WordWithPriority(s.word(), s.priority() * (count.getAndIncrement() + 5)))
+ .sorted(comparing(WordWithPriority::priority));
+ }
+
+ private static @Nullable WordWithPriority processSuggestion(@NotNull Suggestion suggestion, @NotNull SpellChecker spellChecker) {
+ if (suggestion instanceof SimpleSuggestion s) {
+ return spellChecker.spellCheck(s.word());
+
+ } else if (suggestion instanceof SplitSuggestion s) {
+ var s1 = spellChecker.spellCheck(s.word1());
+ if (s1 == null)
+ return null;
+
+ var s2 = spellChecker.spellCheck(s.word2());
+ if (s2 == null)
+ return null;
+
+ return new WordWithPriority(s1.word() + " " + s2.word(), (s1.priority() + s2.priority()) * s.priorityMultiplier());
+ } else {
+ throw new IllegalStateException("unexpected suggestion: " + suggestion);
+ }
+ }
+
+ private static @NotNull UnaryOperator capitalizer(@NotNull String word) {
+ if (isAllUpper(word))
+ return String::toUpperCase;
+ else if (isUpperCase(word.charAt(0)))
+ return StringUtils::capitalizeIfLower;
+ else
+ return UnaryOperator.identity();
+ }
+
+ private static @NotNull Analyzer newAnalyzer(@NotNull Morphology morphology) {
+ var config = new AnalyzerConfiguration();
+
+ config.setIncludeWord(true);
+ config.setIncludeStructure(true);
+ config.setIncludeBasicAttributes(true);
+ config.setIncludeOrganizationNameAnalysis(true);
+
+ config.setIncludeBaseForm(false);
+ config.setIncludeBaseFormParts(false);
+ config.setIncludeFstOutput(false);
+
+ return morphology.newAnalyzer(config);
+ }
+
+ private static @NotNull Function> simple(@NotNull Function> f) {
+ return w -> f.apply(w).map(SimpleSuggestion::new);
+ }
+
+ private static @NotNull Function> simple(@NotNull BiFunction> f, T param) {
+ return w -> f.apply(w, param).map(SimpleSuggestion::new);
+ }
+}
diff --git a/src/main/java/fi/evident/raudikko/internal/suggestions/Replacements.java b/src/main/java/fi/evident/raudikko/internal/suggestions/Replacements.java
new file mode 100644
index 0000000..e04329f
--- /dev/null
+++ b/src/main/java/fi/evident/raudikko/internal/suggestions/Replacements.java
@@ -0,0 +1,97 @@
+/*
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 2.0 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * https://www.mozilla.org/en-US/MPL/2.0/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Libvoikko: Library of natural language processing tools.
+ * The Initial Developer of the Original Code is Harri Pitkänen .
+ * Portions created by the Initial Developer are Copyright (C) 2012
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Raudikko, the Java port of the Initial Code is Copyright (C) 2020 by
+ * Evident Solutions Oy. All Rights Reserved.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ */
+
+package fi.evident.raudikko.internal.suggestions;
+
+import fi.evident.raudikko.internal.utils.CharMap;
+import fi.evident.raudikko.internal.utils.CollectionUtils;
+import org.jetbrains.annotations.NotNull;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+import static java.lang.Character.isLowerCase;
+import static java.lang.Character.toUpperCase;
+
+final class Replacements {
+
+ static final @NotNull Replacements REPLACEMENTS_1 = parse(".,asiuiotrtdersšsanmuilkklkgoiäömnrertvbpbpoythjjhjkdtdsdföägfghgkfgfdbpbncvcswewvxczžzxqaåoåpåäåöaeiktyea");
+ static final @NotNull Replacements REPLACEMENTS_2 = parse("1q2q2w3w3e4e4r5r5t6t6y7y7u8u8i9i9o0o0p+pie");
+ static final @NotNull Replacements REPLACEMENTS_3 = parse("essdnhujlökjopäpmkrdvgplyhhujideölgtfvbvckwaxszaqkåaaåeéaâkcscijxz");
+ static final @NotNull Replacements REPLACEMENTS_4 = parse("qwqswqwswdedefrfrgtftgthygyjuhukilokolpöpäsesxdrbgfefrftfcgygbgvhyhnhbhgjujmjnkikokmlolpöpöåäåzsxdcdcfcxvfbhnjnbmjewpåaqswszdwdcdxvcawazsq");
+ static final @NotNull Replacements REPLACEMENTS_5 = parse("aooaoutlsraieääeuvvuoddokqpvvpqeeqaddarsetteryyrtuutyiiyuoippioåhvvhhmmh");
+ static final @NotNull Replacements REPLACEMENTS_1_FULL = REPLACEMENTS_1.extendWithMatchingUpperCaseReplacements();
+ static final @NotNull Replacements REPLACEMENTS_2_FULL = REPLACEMENTS_2.extendWithMatchingUpperCaseReplacements();
+ static final @NotNull Replacements REPLACEMENTS_3_FULL = REPLACEMENTS_3.extendWithMatchingUpperCaseReplacements();
+ static final @NotNull Replacements REPLACEMENTS_4_FULL = REPLACEMENTS_4.extendWithMatchingUpperCaseReplacements();
+ static final @NotNull Replacements REPLACEMENTS_5_FULL = REPLACEMENTS_5.extendWithMatchingUpperCaseReplacements();
+
+ private final @NotNull CharMap replacementMapping;
+ private static final char @NotNull[] EMPTY_MAPPING = new char[0];
+
+ private Replacements(@NotNull CharMap replacementMapping) {
+ this.replacementMapping = replacementMapping;
+ }
+
+ public char @NotNull [] forCharacter(char from) {
+ return replacementMapping.getOrDefault(from, EMPTY_MAPPING);
+ }
+
+ private @NotNull Replacements extendWithMatchingUpperCaseReplacements() {
+ var newMapping = replacementMapping.copy();
+ for (char ch : newMapping.keys()) {
+ var values = newMapping.get(ch);
+ if (values != null && isLowerCase(ch))
+ newMapping.put(toUpperCase(ch), CollectionUtils.toUpperCase(values));
+ }
+
+ return new Replacements(newMapping);
+ }
+
+ private static @NotNull Replacements parse(@NotNull String input) {
+ if (input.length() % 2 != 0) throw new IllegalArgumentException("invalid replacement string " + input);
+
+ var mapping = new HashMap>();
+ for (int i = 0; i < input.length(); i += 2) {
+ char from = input.charAt(i);
+ char to = input.charAt(i + 1);
+
+ var targets = mapping.computeIfAbsent(from, k -> new ArrayList<>());
+ targets.add(to);
+ }
+
+ var result = new CharMap();
+ mapping.forEach((key, value) -> result.put(key, CollectionUtils.toCharArray(value)));
+ return new Replacements(result);
+ }
+}
diff --git a/src/main/java/fi/evident/raudikko/internal/suggestions/SpellChecker.java b/src/main/java/fi/evident/raudikko/internal/suggestions/SpellChecker.java
new file mode 100644
index 0000000..256067d
--- /dev/null
+++ b/src/main/java/fi/evident/raudikko/internal/suggestions/SpellChecker.java
@@ -0,0 +1,138 @@
+/*
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 2.0 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * https://www.mozilla.org/en-US/MPL/2.0/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Libvoikko: Library of natural language processing tools.
+ * The Initial Developer of the Original Code is Harri Pitkänen .
+ * Portions created by the Initial Developer are Copyright (C) 2012
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Raudikko, the Java port of the Initial Code is Copyright (C) 2020 by
+ * Evident Solutions Oy. All Rights Reserved.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ */
+
+package fi.evident.raudikko.internal.suggestions;
+
+import fi.evident.raudikko.Analysis;
+import fi.evident.raudikko.Analyzer;
+import fi.evident.raudikko.analysis.Structure;
+import org.jetbrains.annotations.NotNull;
+import org.jetbrains.annotations.Nullable;
+
+import static java.lang.Math.min;
+
+/**
+ * A spell-checker that uses Analyzer to handle the spell-checking.
+ */
+final class SpellChecker {
+
+ private final @NotNull Analyzer analyzer;
+
+ public SpellChecker(@NotNull Analyzer analyzer) {
+ this.analyzer = analyzer;
+ }
+
+ public @Nullable WordWithPriority spellCheck(@NotNull String word) {
+ var analyses = analyzer.analyze(word);
+ WordWithPriority best = null;
+
+ for (var analysis : analyses) {
+ var result = createResult(word, analysis);
+ if (result != null && (best == null || result.priority() < best.priority()))
+ best = result;
+ }
+
+ return best;
+ }
+
+ private static @Nullable WordWithPriority createResult(@NotNull String word, @NotNull Analysis analysis) {
+ var structure = analysis.getStructure();
+ if (structure == null) return null;
+
+ var cr = CapitalizationResult.resolve(word, structure);
+ var priority = priorityFromWordClassAndInflection(analysis) * priorityFromStructure(structure) * cr.priority();
+
+ return new WordWithPriority(cr == CapitalizationResult.OK ? word : structure.apply(word), priority);
+ }
+
+ private static int priorityFromStructure(@NotNull Structure structure) {
+ return 1 << (3 * (min(structure.getMorphemeCount(), 5) - 1));
+ }
+
+ private static int priorityFromWordClassAndInflection(@NotNull Analysis analysis) {
+ var wordClass = analysis.getWordClass();
+ if (wordClass == null)
+ return 4;
+
+ return switch (wordClass) {
+ case NOUN, ADJECTIVE, NOUN_ADJECTIVE, PRONOUN, FIRST_NAME, LAST_NAME, TOPONYM, PROPER_NOUN ->
+ priorityFromNounInflection(analysis);
+ default -> 4; // other word classes have no special handling yet
+ };
+ }
+
+ private static int priorityFromNounInflection(@NotNull Analysis analysis) {
+ var locative = analysis.getLocative();
+ if (locative == null)
+ return 4;
+
+ return switch (locative) {
+ case NOMINATIVE -> 2;
+ case GENITIVE -> 3;
+ case PARTITIVE -> 5;
+ case INESIVE, ILLATIVE -> 8;
+ case ELATIVE, ADESSIVE -> 12;
+ case ALLATIVE, ESSIVE, TRANSLATIVE, INSTRUCTIVE -> 20;
+ case ABLATIVE -> 30;
+ case ABESSIVE, COMITATIVE -> 60;
+ default -> 4;
+ };
+ }
+
+ private enum CapitalizationResult {
+ OK, FIRST_CAPITALIZED, CAPITALIZATION_ERROR;
+
+ int priority() {
+ return ordinal() + 1;
+ }
+
+ static @NotNull CapitalizationResult resolve(@NotNull String word, @NotNull Structure structure) {
+ var result = OK;
+ var it = structure.nonMorphemes();
+
+ for (int i = 0; i < word.length(); i++) {
+ if (!it.hasNext())
+ break;
+
+ var expected = it.next();
+ if (!expected.agrees(word.charAt(i))) {
+ if (i == 0 && expected.isUpperCase())
+ result = CapitalizationResult.FIRST_CAPITALIZED;
+ else
+ return CapitalizationResult.CAPITALIZATION_ERROR;
+ }
+ }
+
+ return result;
+ }
+ }
+}
diff --git a/src/main/java/fi/evident/raudikko/internal/suggestions/Suggestion.java b/src/main/java/fi/evident/raudikko/internal/suggestions/Suggestion.java
new file mode 100644
index 0000000..af950d1
--- /dev/null
+++ b/src/main/java/fi/evident/raudikko/internal/suggestions/Suggestion.java
@@ -0,0 +1,44 @@
+/*
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 2.0 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * https://www.mozilla.org/en-US/MPL/2.0/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Libvoikko: Library of natural language processing tools.
+ * The Initial Developer of the Original Code is Harri Pitkänen .
+ * Portions created by the Initial Developer are Copyright (C) 2012
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Raudikko, the Java port of the Initial Code is Copyright (C) 2020 by
+ * Evident Solutions Oy. All Rights Reserved.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ */
+
+package fi.evident.raudikko.internal.suggestions;
+
+import org.jetbrains.annotations.NotNull;
+
+public sealed interface Suggestion permits Suggestion.SimpleSuggestion, Suggestion.SplitSuggestion {
+
+ record SimpleSuggestion(@NotNull String word) implements Suggestion {
+ }
+
+ record SplitSuggestion(@NotNull String word1, @NotNull String word2, int priorityMultiplier) implements Suggestion {
+ }
+}
diff --git a/src/main/java/fi/evident/raudikko/internal/suggestions/SuggestionGenerators.java b/src/main/java/fi/evident/raudikko/internal/suggestions/SuggestionGenerators.java
new file mode 100644
index 0000000..177b078
--- /dev/null
+++ b/src/main/java/fi/evident/raudikko/internal/suggestions/SuggestionGenerators.java
@@ -0,0 +1,220 @@
+/*
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 2.0 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * https://www.mozilla.org/en-US/MPL/2.0/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Libvoikko: Library of natural language processing tools.
+ * The Initial Developer of the Original Code is Harri Pitkänen .
+ * Portions created by the Initial Developer are Copyright (C) 2012
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Raudikko, the Java port of the Initial Code is Copyright (C) 2020 by
+ * Evident Solutions Oy. All Rights Reserved.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ */
+
+package fi.evident.raudikko.internal.suggestions;
+
+import fi.evident.raudikko.internal.suggestions.Suggestion.SplitSuggestion;
+import fi.evident.raudikko.internal.utils.StringUtils;
+import org.jetbrains.annotations.NotNull;
+import org.jetbrains.annotations.Nullable;
+
+import java.util.ArrayList;
+import java.util.stream.IntStream;
+import java.util.stream.Stream;
+
+import static fi.evident.raudikko.internal.utils.CharUtils.*;
+import static fi.evident.raudikko.internal.utils.StringUtils.*;
+import static java.lang.Math.min;
+
+final class SuggestionGenerators {
+
+ /**
+ * Generates suggestions by deleting each character from the input word.
+ * A character is not deleted if it is the same as the previous character.
+ */
+ static @NotNull Stream delete(@NotNull String word) {
+ return IntStream.range(0, word.length())
+ .filter(i -> i == 0 || !equalsIgnoreCase(word.charAt(i), word.charAt(i - 1)))
+ .mapToObj(i -> removeRange(word, i, i + 1));
+ }
+
+ /**
+ * Generates suggestions by removing duplicate pairs of characters.
+ */
+ static @NotNull Stream deleteTwo(@NotNull String word) {
+ if (word.length() < 6)
+ return Stream.empty();
+
+ return IntStream.range(0, word.length() - 3)
+ .filter(i -> word.regionMatches(i, word, i + 2, 2))
+ .mapToObj(i -> removeRange(word, i, i + 2));
+ }
+
+ /**
+ * Generates a variant without soft hyphens.
+ */
+ static @NotNull Stream removeSoftHyphens(@NotNull String word) {
+ var withoutHyphen = word.replace("\u00AD", "");
+
+ return withoutHyphen.equals(word) ? Stream.empty() : Stream.of(withoutHyphen);
+ }
+
+ /**
+ * Generates suggestions by trying to apply each of the given replacements to each of the possible characters.
+ */
+ static @NotNull Stream replace(@NotNull String word, @NotNull Replacements replacements) {
+ var result = Stream.builder();
+
+ for (var i = 0; i < word.length(); i++)
+ for (var to : replacements.forCharacter(word.charAt(i)))
+ result.add(replaceCharAt(word, i, to));
+
+ return result.build();
+ }
+
+ /**
+ * Generates suggestions where two a pair of same characters are replaced by pair of
+ * other characters using given replacement mappings.
+ */
+ static @NotNull Stream replaceTwo(@NotNull String word, @NotNull Replacements replacements) {
+ var s = word.toLowerCase();
+ var result = Stream.builder();
+
+ for (int i = 1; i < s.length(); i++) {
+ var ch = s.charAt(i);
+ if (ch == s.charAt(i - 1)) {
+ for (char to : replacements.forCharacter(ch))
+ result.add(replaceTwoChars(s, i - 1, to));
+ i++;
+ }
+ }
+ return result.build();
+ }
+
+ /**
+ * Generates suggestions by inserting given characters into the string.
+ * Will not insert a character next to an existing instance of it.
+ */
+ static @NotNull Stream insertion(@NotNull String word, @NotNull String insertedChars) {
+ return insertedChars.chars().mapToObj(c -> insertions(word, (char) c)).flatMap(s -> s);
+ }
+
+ private static @NotNull Stream insertions(@NotNull String word, char insertionChar) {
+ return IntStream.rangeClosed(0, word.length())
+ .filter(i -> !containsAdjacentCharacterIgnoringCase(word, i, insertionChar))
+ .mapToObj(i -> word.substring(0, i) + insertionChar + word.substring(i));
+ }
+
+ /**
+ * Generates suggestions by inserting a hyphen into various places.
+ * Hyphen is never inserted near an existing hyphen or near beginning or end.
+ */
+ @NotNull
+ static Stream insertHyphen(@NotNull String word) {
+ return IntStream.range(2, word.length() - 1)
+ .filter(i -> !containsInSubstring(word, i - 2, i + 2, '-'))
+ .mapToObj(i -> word.substring(0, i) + '-' + word.substring(i));
+ }
+
+ /**
+ * Generates suggestions by duplicating existing characters in a word.
+ */
+ @NotNull
+ static Stream duplicateCharacters(@NotNull String word) {
+ return IntStream.range(0, word.length())
+ .filter(i -> {
+ char c = word.charAt(i);
+ return (i == 0 || word.charAt(i - 1) != c)
+ && (i + 1 >= word.length() || word.charAt(i + 1) != c)
+ && c != '-' && c != '\'';
+ })
+ .mapToObj(i -> word.substring(0, i) + word.charAt(i) + word.substring(i));
+ }
+
+ /**
+ * Generates suggestions by swapping characters with nearby characters.
+ */
+ static @NotNull Stream swap(@NotNull String word) {
+ var maxDistance = (word.length() <= 8) ? word.length() : (50 / word.length());
+ if (maxDistance == 0)
+ return Stream.empty();
+
+ return IntStream.range(0, word.length())
+ .mapToObj(i -> swapOne(word, maxDistance, i))
+ .flatMap(s -> s);
+ }
+
+ private static @NotNull Stream swapOne(@NotNull String word, int maxDistance, int i) {
+ return IntStream.range(i + 1, min(i + maxDistance + 1, word.length()))
+ .filter(j -> !equalsIgnoreCase(word.charAt(i), word.charAt(j)) && !isFrontOrBackVowel(word.charAt(i)))
+ .mapToObj(j -> StringUtils.swap(word, i, j));
+ }
+
+ /**
+ * Generates suggestions by converting front-vowels to back-vowels and vice versa.
+ */
+ static @NotNull Stream vowelChange(@NotNull String word) {
+ var frontOrBackVowelIndices = new ArrayList(word.length());
+ for (int i = 0, len = word.length(); i < len; i++)
+ if (isFrontOrBackVowel(word.charAt(i)))
+ frontOrBackVowelIndices.add(i);
+
+ if (frontOrBackVowelIndices.size() == 0 || frontOrBackVowelIndices.size() > 7)
+ return Stream.empty();
+
+ return IntStream.range(1, 1 << frontOrBackVowelIndices.size()).mapToObj(mask -> {
+ var chars = word.toCharArray();
+
+ for (int j = 0; j < frontOrBackVowelIndices.size(); j++) {
+ int i = frontOrBackVowelIndices.get(j);
+
+ if ((mask & (1 << j)) != 0)
+ chars[i] = convertVowelBetweenFrontAndBack(chars[i]);
+ }
+
+ return new String(chars);
+ });
+ }
+
+ /**
+ * Returns suggestions by trying to split the word at various points.
+ */
+ static @NotNull Stream splitWord(@NotNull String word) {
+ var lower = word.toLowerCase();
+ return IntStream.range(2, lower.length() - 2)
+ .mapToObj(i -> splitWordAt(lower, lower.length() - i))
+ .flatMap(Stream::ofNullable);
+ }
+
+ private static @Nullable SplitSuggestion splitWordAt(@NotNull String word, int i) {
+ // Don't split if there's a nearby hyphen
+ if (word.charAt(i - 2) == '-' || word.charAt(i - 1) == '-' || word.charAt(i + 1) == '-')
+ return null;
+
+ var hyphen = word.charAt(i) == '-';
+ var word1 = word.substring(0, i);
+ var word2 = word.substring(i + (hyphen ? 1 : 0));
+ var priorityMultiplier = hyphen ? 6 : 1;
+
+ return new SplitSuggestion(word1, word2, priorityMultiplier);
+ }
+}
diff --git a/src/main/java/fi/evident/raudikko/internal/suggestions/WordWithPriority.java b/src/main/java/fi/evident/raudikko/internal/suggestions/WordWithPriority.java
new file mode 100644
index 0000000..b67e3bb
--- /dev/null
+++ b/src/main/java/fi/evident/raudikko/internal/suggestions/WordWithPriority.java
@@ -0,0 +1,38 @@
+/*
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 2.0 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * https://www.mozilla.org/en-US/MPL/2.0/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Libvoikko: Library of natural language processing tools.
+ * The Initial Developer of the Original Code is Harri Pitkänen .
+ * Portions created by the Initial Developer are Copyright (C) 2012
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Raudikko, the Java port of the Initial Code is Copyright (C) 2020 by
+ * Evident Solutions Oy. All Rights Reserved.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ */
+
+package fi.evident.raudikko.internal.suggestions;
+
+import org.jetbrains.annotations.NotNull;
+
+record WordWithPriority(@NotNull String word, int priority) {
+}
diff --git a/src/main/java/fi/evident/raudikko/internal/utils/CharMap.java b/src/main/java/fi/evident/raudikko/internal/utils/CharMap.java
index e83407c..0838436 100644
--- a/src/main/java/fi/evident/raudikko/internal/utils/CharMap.java
+++ b/src/main/java/fi/evident/raudikko/internal/utils/CharMap.java
@@ -32,11 +32,16 @@
package fi.evident.raudikko.internal.utils;
+import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
+import java.util.ArrayList;
+import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
+import static java.util.Collections.unmodifiableCollection;
+
public final class CharMap {
private final Object[] low = new Object[256];
@@ -62,4 +67,28 @@ else if (high != null)
else
return null;
}
+
+ public @NotNull T getOrDefault(char key, @NotNull T defaultValue) {
+ var result = get(key);
+ return result != null ? result : defaultValue;
+ }
+
+ public @NotNull Collection keys() {
+ var result = new ArrayList();
+ for (int i = 0; i < low.length; i++)
+ if (low[i] != null)
+ result.add((char) i);
+
+ if (high != null)
+ result.addAll(high.keySet());
+
+ return unmodifiableCollection(result);
+ }
+
+ public @NotNull CharMap copy() {
+ var result = new CharMap();
+ System.arraycopy(low, 0, result.low, 0, low.length);
+ result.high = high != null ? new HashMap<>(high) : null;
+ return result;
+ }
}
diff --git a/src/main/java/fi/evident/raudikko/internal/utils/CharUtils.java b/src/main/java/fi/evident/raudikko/internal/utils/CharUtils.java
new file mode 100644
index 0000000..26168d0
--- /dev/null
+++ b/src/main/java/fi/evident/raudikko/internal/utils/CharUtils.java
@@ -0,0 +1,69 @@
+/*
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 2.0 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * https://www.mozilla.org/en-US/MPL/2.0/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Libvoikko: Library of natural language processing tools.
+ * The Initial Developer of the Original Code is Harri Pitkänen .
+ * Portions created by the Initial Developer are Copyright (C) 2012
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Raudikko, the Java port of the Initial Code is Copyright (C) 2020 by
+ * Evident Solutions Oy. All Rights Reserved.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ */
+
+package fi.evident.raudikko.internal.utils;
+
+import org.jetbrains.annotations.NotNull;
+
+import static fi.evident.raudikko.internal.utils.StringUtils.contains;
+import static java.lang.Character.toLowerCase;
+
+public final class CharUtils {
+
+ private static final @NotNull String FRONT_AND_BACK_VOWELS = "aouAOUäöyÄÖY";
+
+ public static char convertVowelBetweenFrontAndBack(char c) {
+ return switch (c) {
+ case 'a' -> 'ä';
+ case 'o' -> 'ö';
+ case 'u' -> 'y';
+ case 'A' -> 'Ä';
+ case 'O' -> 'Ö';
+ case 'U' -> 'Y';
+ case 'ä' -> 'a';
+ case 'ö' -> 'o';
+ case 'y' -> 'u';
+ case 'Ä' -> 'A';
+ case 'Ö' -> 'O';
+ case 'Y' -> 'U';
+ default -> c;
+ };
+ }
+
+ public static boolean isFrontOrBackVowel(char ch) {
+ return contains(FRONT_AND_BACK_VOWELS, ch);
+ }
+
+ public static boolean equalsIgnoreCase(char c1, char c2) {
+ return c1 == c2 || toLowerCase(c1) == toLowerCase(c2);
+ }
+}
diff --git a/src/main/java/fi/evident/raudikko/internal/utils/CollectionUtils.java b/src/main/java/fi/evident/raudikko/internal/utils/CollectionUtils.java
index 1da3589..136fd6e 100644
--- a/src/main/java/fi/evident/raudikko/internal/utils/CollectionUtils.java
+++ b/src/main/java/fi/evident/raudikko/internal/utils/CollectionUtils.java
@@ -46,4 +46,18 @@ public static int count(@NotNull List xs, @NotNull T x) {
return count;
}
+
+ public static char[] toCharArray(@NotNull List xs) {
+ char[] ys = new char[xs.size()];
+ for (int i = 0; i < xs.size(); i++)
+ ys[i] = xs.get(i);
+ return ys;
+ }
+
+ public static char[] toUpperCase(char @NotNull [] xs) {
+ char[] ys = new char[xs.length];
+ for (int i = 0; i < xs.length; i++)
+ ys[i] = Character.toUpperCase(xs[i]);
+ return ys;
+ }
}
diff --git a/src/main/java/fi/evident/raudikko/internal/utils/StringUtils.java b/src/main/java/fi/evident/raudikko/internal/utils/StringUtils.java
index c1b8383..c181b69 100644
--- a/src/main/java/fi/evident/raudikko/internal/utils/StringUtils.java
+++ b/src/main/java/fi/evident/raudikko/internal/utils/StringUtils.java
@@ -34,20 +34,16 @@
import org.jetbrains.annotations.NotNull;
-import static java.lang.Character.toUpperCase;
+import java.util.stream.IntStream;
+
+import static fi.evident.raudikko.internal.utils.CharUtils.equalsIgnoreCase;
+import static java.lang.Character.*;
public final class StringUtils {
private StringUtils() {
}
- public static @NotNull String replaceCharAt(@NotNull String s, int i, char c) {
- if (s.charAt(i) == c) return s;
- var chars = s.toCharArray();
- chars[i] = c;
- return new String(chars);
- }
-
public static @NotNull String withoutChar(@NotNull CharSequence s, char removed) {
var sb = new StringBuilder(s.length());
for (int i = 0, len = s.length(); i < len; i++) {
@@ -89,19 +85,32 @@ public static boolean isAllLower(@NotNull CharSequence s) {
return s.chars().noneMatch(Character::isUpperCase);
}
+ public static @NotNull IntStream charIndices(@NotNull String word, char ch) {
+ IntStream.Builder result = IntStream.builder();
+
+ for (int i = word.indexOf(ch); i != -1; i = word.indexOf(ch, i + 1))
+ result.add(i);
+
+ return result.build();
+ }
+
public static @NotNull String capitalize(@NotNull String s) {
- if (s.isEmpty()) return s;
+ if (s.isEmpty() || isUpperCase(s.charAt(0))) return s;
return toUpperCase(s.charAt(0)) + s.substring(1);
}
+ public static @NotNull String decapitalize(@NotNull String s) {
+ if (s.isEmpty() || isLowerCase(s.charAt(0))) return s;
+
+ return toLowerCase(s.charAt(0)) + s.substring(1);
+ }
+
public static @NotNull String removeRange(@NotNull String s, int startIndex, int endIndex) {
if (endIndex < startIndex)
throw new IndexOutOfBoundsException();
- else if (endIndex == startIndex)
- return s;
- else
- return s.substring(0, startIndex) + s.substring(endIndex);
+
+ return endIndex == startIndex ? s : s.substring(0, startIndex) + s.substring(endIndex);
}
public static boolean contains(@NotNull CharSequence s, char c) {
@@ -113,7 +122,7 @@ public static int indexOf(@NotNull CharSequence s, char c) {
}
public static int indexOf(@NotNull CharSequence s, char c, int fromIndex) {
- for (int i = fromIndex, n = s.length(); i < n; i++)
+ for (int i = fromIndex, len = s.length(); i < len; i++)
if (s.charAt(i) == c)
return i;
return -1;
@@ -123,13 +132,42 @@ public static boolean matchesAt(@NotNull CharSequence haystack, int offset, @Not
if (offset < 0 || offset + needle.length() > haystack.length())
return false;
- for (int i = 0; i < needle.length(); i++)
+ for (int i = 0, len = needle.length(); i < len; i++)
if (haystack.charAt(i + offset) != needle.charAt(i))
return false;
return true;
}
+ public static @NotNull String swap(@NotNull String s, int i, int j) {
+ var chars = s.toCharArray();
+ chars[i] = s.charAt(j);
+ chars[j] = s.charAt(i);
+ return new String(chars);
+ }
+
+ public static @NotNull String replaceCharAt(@NotNull String s, int i, char ch) {
+ if (s.charAt(i) == ch) return s;
+ return s.substring(0, i) + ch + s.substring(i + 1);
+ }
+
+ public static @NotNull String replaceTwoChars(@NotNull String word, int i, char to) {
+ var chars = word.toCharArray();
+ chars[i] = to;
+ chars[i + 1] = to;
+ return new String(chars);
+ }
+
+ public static boolean containsAdjacentCharacterIgnoringCase(@NotNull String s, int i, char ch) {
+ return (i > 0 && equalsIgnoreCase(ch, s.charAt(i - 1)))
+ || (i < s.length() && equalsIgnoreCase(ch, s.charAt(i)));
+ }
+
+ public static boolean containsInSubstring(@NotNull String word, int start, int end, char ch) {
+ int i = word.indexOf(ch, start);
+ return i != -1 && i < end;
+ }
+
public static @NotNull String removeLeadingAndTrailing(@NotNull String s, char c) {
if (s.isEmpty() || s.length() == 1 && s.charAt(0) == c)
return "";
diff --git a/src/test/java/fi/evident/raudikko/integration/SpellingSuggesterTest.java b/src/test/java/fi/evident/raudikko/integration/SpellingSuggesterTest.java
new file mode 100644
index 0000000..4b712b8
--- /dev/null
+++ b/src/test/java/fi/evident/raudikko/integration/SpellingSuggesterTest.java
@@ -0,0 +1,93 @@
+/*
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 2.0 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * https://www.mozilla.org/en-US/MPL/2.0/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Libvoikko: Library of natural language processing tools.
+ * The Initial Developer of the Original Code is Harri Pitkänen .
+ * Portions created by the Initial Developer are Copyright (C) 2012
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Raudikko, the Java port of the Initial Code is Copyright (C) 2020 by
+ * Evident Solutions Oy. All Rights Reserved.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ */
+
+package fi.evident.raudikko.integration;
+
+import fi.evident.raudikko.Morphology;
+import fi.evident.raudikko.SpellingSuggester;
+import org.jetbrains.annotations.NotNull;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.TestInstance;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.MethodSource;
+import org.opentest4j.TestAbortedException;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
+import java.util.regex.Pattern;
+
+import static fi.evident.raudikko.test.ResourceUtils.readLines;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.fail;
+import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS;
+
+@TestInstance(PER_CLASS)
+class SpellingSuggesterTest {
+
+ private static final @NotNull Pattern TEST_PATTERN = Pattern.compile("(.+):\\[(.*)]");
+ private SpellingSuggester suggester;
+
+ @BeforeAll
+ void setup() {
+ suggester = Morphology.loadBundled().newSpellingSuggester();
+ }
+
+ @ParameterizedTest(name = "{0}")
+ @MethodSource("testData")
+ void testSuggestions(@NotNull String line) {
+ var ignored = line.startsWith("#");
+ if (ignored)
+ line = line.substring(1);
+
+ var m = TEST_PATTERN.matcher(line);
+ if (!m.matches())
+ fail("Invalid line '" + line + "'");
+
+ var word = m.group(1);
+ var suggestions = suggester.provideSpellingSuggestions(word);
+ var expected = Arrays.stream(m.group(2).split(";")).filter(s -> !s.isEmpty()).toList();
+
+ if (!ignored) {
+ assertEquals(expected, suggestions, "word: " + word);
+ } else {
+ if (expected.equals(suggestions))
+ fail("PASSED test for ignored word " + word);
+ else
+ throw new TestAbortedException("ignored word " + word);
+ }
+ }
+
+ private static @NotNull List testData() throws IOException {
+ return readLines("typing-error-suggester-test.txt");
+ }
+}
diff --git a/src/test/java/fi/evident/raudikko/internal/suggestions/SpellCheckerTest.java b/src/test/java/fi/evident/raudikko/internal/suggestions/SpellCheckerTest.java
new file mode 100644
index 0000000..be48fd2
--- /dev/null
+++ b/src/test/java/fi/evident/raudikko/internal/suggestions/SpellCheckerTest.java
@@ -0,0 +1,72 @@
+/*
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 2.0 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * https://www.mozilla.org/en-US/MPL/2.0/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Libvoikko: Library of natural language processing tools.
+ * The Initial Developer of the Original Code is Harri Pitkänen .
+ * Portions created by the Initial Developer are Copyright (C) 2012
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Raudikko, the Java port of the Initial Code is Copyright (C) 2020 by
+ * Evident Solutions Oy. All Rights Reserved.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ */
+
+package fi.evident.raudikko.internal.suggestions;
+
+import fi.evident.raudikko.Morphology;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.TestInstance;
+
+import static org.junit.jupiter.api.Assertions.*;
+import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS;
+
+@TestInstance(PER_CLASS)
+class SpellCheckerTest {
+
+ private SpellChecker spellChecker;
+
+ @BeforeAll
+ void setup() {
+ var morphology = Morphology.loadBundled();
+ spellChecker = new SpellChecker(morphology.newAnalyzer());
+ }
+
+ @Test
+ void unknownWord() {
+ assertNull(spellChecker.spellCheck("an-english-phrase-is-not-a-finnish-word"));
+ }
+
+ @Test
+ void knownWordCapitalizedCorrectly() {
+ var result = spellChecker.spellCheck("kissa");
+ assertNotNull(result);
+ assertEquals("kissa", result.word());
+ }
+
+ @Test
+ void knownWordCapitalizedIncorrectly() {
+ var result = spellChecker.spellCheck("helsinki");
+ assertNotNull(result);
+ assertEquals("Helsinki", result.word());
+ }
+}
diff --git a/src/test/java/fi/evident/raudikko/internal/suggestions/SuggestionGeneratorsTest.java b/src/test/java/fi/evident/raudikko/internal/suggestions/SuggestionGeneratorsTest.java
new file mode 100644
index 0000000..2dc95e1
--- /dev/null
+++ b/src/test/java/fi/evident/raudikko/internal/suggestions/SuggestionGeneratorsTest.java
@@ -0,0 +1,258 @@
+/*
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 2.0 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * https://www.mozilla.org/en-US/MPL/2.0/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Libvoikko: Library of natural language processing tools.
+ * The Initial Developer of the Original Code is Harri Pitkänen .
+ * Portions created by the Initial Developer are Copyright (C) 2012
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Raudikko, the Java port of the Initial Code is Copyright (C) 2020 by
+ * Evident Solutions Oy. All Rights Reserved.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ */
+
+package fi.evident.raudikko.internal.suggestions;
+
+import org.junit.jupiter.api.Nested;
+import org.junit.jupiter.api.Test;
+
+import java.util.List;
+
+import static fi.evident.raudikko.internal.suggestions.Replacements.REPLACEMENTS_1;
+import static fi.evident.raudikko.internal.suggestions.Replacements.REPLACEMENTS_1_FULL;
+import static fi.evident.raudikko.internal.suggestions.SuggestionGenerators.*;
+import static java.util.Arrays.asList;
+import static java.util.Collections.emptyList;
+import static java.util.Collections.singletonList;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+@SuppressWarnings("SpellCheckingInspection")
+class SuggestionGeneratorsTest {
+
+ @Nested
+ class Delete {
+
+ @Test
+ void suggestions() {
+ assertEquals(emptyList(), delete("").toList());
+ assertEquals(asList("ello", "hllo", "helo", "hell"), delete("hello").toList());
+ }
+ }
+
+ @Nested
+ class DeleteTwo {
+
+ @Test
+ void wordsUnderLimit() {
+ assertEquals(emptyList(), deleteTwo("").toList());
+ assertEquals(emptyList(), deleteTwo("hello").toList());
+ assertEquals(emptyList(), deleteTwo("aaaaa").toList());
+ }
+
+ @Test
+ void noConsecutiveDuplicatePairs() {
+ assertEquals(emptyList(), deleteTwo("abcdefgh").toList());
+ assertEquals(emptyList(), deleteTwo("abcdeeefgh").toList());
+ }
+
+ @Test
+ void consecutiveDuplicatePairs() {
+ assertEquals(asList("abccdeeeef", "abccccdeef"), deleteTwo("abccccdeeeef").toList());
+ }
+
+ @Test
+ void nonConsecutiveDuplicatePairs() {
+ assertEquals(emptyList(), deleteTwo("aabbaa").toList());
+ }
+ }
+
+ @Nested
+ class RemoveSoftHyphens {
+
+ @Test
+ void noSoftHyphens() {
+ assertEquals(emptyList(), removeSoftHyphens("").toList());
+ assertEquals(emptyList(), removeSoftHyphens("foo").toList());
+ }
+
+ @Test
+ void softHyphensAreRemoved() {
+ assertEquals(singletonList("foobar"), removeSoftHyphens("foo\u00ADbar").toList());
+ assertEquals(singletonList("foobarbaz"), removeSoftHyphens("foo\u00ADbar\u00ADbaz").toList());
+ }
+ }
+
+
+ @Nested
+ class Replace {
+
+ @Test
+ void suggestions() {
+ assertEquals(List.of(
+ "batsaneläkeruokaa", "vstsaneläkeruokaa", "vetsaneläkeruokaa", "varsaneläkeruokaa", "vadsaneläkeruokaa",
+ "vaysaneläkeruokaa", "vatšaneläkeruokaa", "vataaneläkeruokaa", "vatssneläkeruokaa", "vatseneläkeruokaa",
+ "vatsameläkeruokaa", "vatsanrläkeruokaa", "vatsanaläkeruokaa", "vatsanekäkeruokaa", "vatsanelökeruokaa",
+ "vatsaneläleruokaa", "vatsanelägeruokaa", "vatsaneläkrruokaa", "vatsaneläkaruokaa", "vatsaneläkeeuokaa",
+ "vatsaneläketuokaa", "vatsaneläkeriokaa", "vatsaneläkeruikaa", "vatsaneläkeruolaa", "vatsaneläkeruogaa",
+ "vatsaneläkeruoksa", "vatsaneläkeruokea", "vatsaneläkeruokas", "vatsaneläkeruokae"
+ ), replace("vatsaneläkeruokaa", REPLACEMENTS_1_FULL).toList());
+ }
+
+ @Test
+ void upperCaseReplacements() {
+ assertEquals(List.of("Goo", "Doo", "Fio", "Foi"), replace("Foo", REPLACEMENTS_1_FULL).toList());
+ }
+ }
+
+ @Nested
+ class ReplaceTwo {
+
+ @Test
+ void suggestions() {
+ assertEquals(emptyList(), replaceTwo("", REPLACEMENTS_1).toList());
+ assertEquals(emptyList(), replaceTwo("bar", REPLACEMENTS_1).toList());
+ assertEquals(asList("fiibarbazquux", "foobarbazqiix"), replaceTwo("foobarbazquux", REPLACEMENTS_1).toList());
+ assertEquals(asList("fiibarbazquux", "foobarbazqiix"), replaceTwo("foobarbazquux", REPLACEMENTS_1).toList());
+ }
+
+ @Test
+ void multipleConsecutiveAreProcessedJustOnce() {
+ assertEquals(asList("iioooo", "ooiioo", "ooooii"), replaceTwo("oooooo", REPLACEMENTS_1).toList());
+ }
+ }
+
+ @Nested
+ class Insertion {
+
+ @Test
+ void suggestions() {
+ assertEquals(List.of(
+ "1foobar", "f1oobar", "fo1obar", "foo1bar", "foob1ar", "fooba1r", "foobar1",
+ "2foobar", "f2oobar", "fo2obar", "foo2bar", "foob2ar", "fooba2r", "foobar2",
+ "3foobar", "f3oobar", "fo3obar", "foo3bar", "foob3ar", "fooba3r", "foobar3"
+ ), insertion("foobar", "123").toList());
+ }
+
+ @Test
+ void suggestionsWhenInsertionCharsAreInInput() {
+ assertEquals(List.of(
+ "1foo12bar", "f1oo12bar", "fo1o12bar", "foo121bar", "foo12b1ar", "foo12ba1r", "foo12bar1",
+ "2foo12bar", "f2oo12bar", "fo2o12bar", "foo212bar", "foo12b2ar", "foo12ba2r", "foo12bar2",
+ "3foo12bar", "f3oo12bar", "fo3o12bar", "foo312bar", "foo132bar", "foo123bar", "foo12b3ar", "foo12ba3r", "foo12bar3"
+ ), insertion("foo12bar", "123").toList());
+ }
+ }
+
+ @Nested
+ class InsertHyphen {
+
+ @Test
+ void testGenerateWithEmptyString() {
+ assertEquals(emptyList(), insertHyphen("").toList());
+ }
+
+ @Test
+ void testGenerateWithStringWithoutHyphen() {
+ assertEquals(List.of("ab-cdefgh", "abc-defgh", "abcd-efgh", "abcde-fgh", "abcdef-gh"), insertHyphen("abcdefgh").toList());
+ }
+
+ @Test
+ void stringWithHyphen() {
+ assertEquals(List.of("ab-cd-efgh", "abcd-ef-gh"), insertHyphen("abcd-efgh").toList());
+ }
+
+ @Test
+ void testGenerateWithStringWithSpecialCharacters() {
+ assertEquals(emptyList(), insertHyphen("a-b-'c").toList());
+ }
+
+ }
+
+ @Nested
+ class DuplicateCharacters {
+
+ @Test
+ void testGenerateWithEmptyString() {
+ assertEquals(emptyList(), duplicateCharacters("").toList());
+ }
+
+ @Test
+ void testGenerateWithStringWithoutHyphen() {
+ assertEquals(List.of("aabcdefgh", "abbcdefgh", "abccdefgh", "abcddefgh", "abcdeefgh", "abcdeffgh", "abcdefggh", "abcdefghh"),
+ duplicateCharacters("abcdefgh").toList());
+ }
+
+ @Test
+ void stringWithHyphen() {
+ assertEquals(List.of("aabcd-efgh", "abbcd-efgh", "abccd-efgh", "abcdd-efgh", "abcd-eefgh", "abcd-effgh", "abcd-efggh", "abcd-efghh"),
+ duplicateCharacters("abcd-efgh").toList());
+ }
+
+ @Test
+ void testGenerateWithStringWithSpecialCharacters() {
+ assertEquals(List.of("aa-b-'c", "a-bb-'c", "a-b-'cc"), duplicateCharacters("a-b-'c").toList());
+ }
+ }
+
+ @Nested
+ class Swap {
+
+ @Test
+ void suggestions() {
+ assertEquals(asList(
+ "ofobarbaz", "oofbarbaz", "boofarbaz", "aoobfrbaz", "roobafbaz",
+ "fooabrbaz", "foorabbaz", "fooaarbbz", "foozarbab", "foobabraz",
+ "foobaabrz", "foobazbar", "foobarabz", "foobarzab"
+ ), swap("foobarbaz").toList());
+ }
+ }
+
+ @Nested
+ class VowelChange {
+
+ @Test
+ void suggestions() {
+ assertEquals(
+ asList("hamähäkki", "hämahäkki", "hamahäkki", "hämähakki", "hamähakki", "hämahakki", "hamahakki"),
+ vowelChange("hämähäkki").toList());
+
+ assertEquals(
+ asList(
+ "äamuyö", "aämuyö", "äämuyö", "aamyyö", "äamyyö", "aämyyö", "äämyyö", "aamuuö",
+ "äamuuö", "aämuuö", "äämuuö", "aamyuö", "äamyuö", "aämyuö", "äämyuö", "aamuyo",
+ "äamuyo", "aämuyo", "äämuyo", "aamyyo", "äamyyo", "aämyyo", "äämyyo", "aamuuo",
+ "äamuuo", "aämuuo", "äämuuo", "aamyuo", "äamyuo", "aämyuo", "äämyuo"),
+ vowelChange("aamuyö").toList());
+
+ assertEquals(asList("öy", "ou", "öu"), vowelChange("oy").toList());
+ }
+ }
+
+ @Nested
+ class SplitWord {
+
+ @Test
+ void suggestions() {
+ assertEquals(asList("foobarb az", "foobar baz", "fooba rbaz", "foob arbaz", "foo barbaz"), splitWord("foobarbaz").map(s -> s.word1() + " " + s.word2()).toList());
+ }
+ }
+}
diff --git a/src/test/java/fi/evident/raudikko/internal/utils/CharUtilsTest.java b/src/test/java/fi/evident/raudikko/internal/utils/CharUtilsTest.java
new file mode 100644
index 0000000..cf1cc92
--- /dev/null
+++ b/src/test/java/fi/evident/raudikko/internal/utils/CharUtilsTest.java
@@ -0,0 +1,78 @@
+/*
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 2.0 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * https://www.mozilla.org/en-US/MPL/2.0/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is Libvoikko: Library of natural language processing tools.
+ * The Initial Developer of the Original Code is Harri Pitkänen .
+ * Portions created by the Initial Developer are Copyright (C) 2012
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Raudikko, the Java port of the Initial Code is Copyright (C) 2020 by
+ * Evident Solutions Oy. All Rights Reserved.
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ */
+
+package fi.evident.raudikko.internal.utils;
+
+import org.junit.jupiter.api.Test;
+
+import static fi.evident.raudikko.internal.utils.CharUtils.convertVowelBetweenFrontAndBack;
+import static fi.evident.raudikko.internal.utils.CharUtils.equalsIgnoreCase;
+import static org.junit.jupiter.api.Assertions.*;
+
+class CharUtilsTest {
+
+ @Test
+ void testConvertBackVowelsToFront() {
+ assertEquals('ä', convertVowelBetweenFrontAndBack('a'));
+ assertEquals('ö', convertVowelBetweenFrontAndBack('o'));
+ assertEquals('y', convertVowelBetweenFrontAndBack('u'));
+ assertEquals('Ä', convertVowelBetweenFrontAndBack('A'));
+ assertEquals('Ö', convertVowelBetweenFrontAndBack('O'));
+ assertEquals('Y', convertVowelBetweenFrontAndBack('U'));
+ }
+
+ @Test
+ void testConvertFrontVowelsToBack() {
+ assertEquals('a', convertVowelBetweenFrontAndBack('ä'));
+ assertEquals('o', convertVowelBetweenFrontAndBack('ö'));
+ assertEquals('u', convertVowelBetweenFrontAndBack('y'));
+ assertEquals('A', convertVowelBetweenFrontAndBack('Ä'));
+ assertEquals('O', convertVowelBetweenFrontAndBack('Ö'));
+ assertEquals('U', convertVowelBetweenFrontAndBack('Y'));
+ }
+
+ @Test
+ void testNonVowelCharacters() {
+ assertEquals('b', convertVowelBetweenFrontAndBack('b'));
+ assertEquals('C', convertVowelBetweenFrontAndBack('C'));
+ assertEquals('1', convertVowelBetweenFrontAndBack('1'));
+ assertEquals('@', convertVowelBetweenFrontAndBack('@'));
+ }
+
+
+ @Test
+ void testEqualsIgnoreCase() {
+ assertTrue(equalsIgnoreCase('a', 'a'));
+ assertTrue(equalsIgnoreCase('A', 'a'));
+ assertFalse(equalsIgnoreCase('A', 'b'));
+ }
+}
diff --git a/src/test/java/fi/evident/raudikko/internal/utils/StringUtilsTest.java b/src/test/java/fi/evident/raudikko/internal/utils/StringUtilsTest.java
index 9aad3bd..dd78bf2 100644
--- a/src/test/java/fi/evident/raudikko/internal/utils/StringUtilsTest.java
+++ b/src/test/java/fi/evident/raudikko/internal/utils/StringUtilsTest.java
@@ -34,14 +34,13 @@
import org.junit.jupiter.api.Test;
-import static fi.evident.raudikko.internal.utils.StringUtils.matchesAt;
-import static org.junit.jupiter.api.Assertions.assertFalse;
-import static org.junit.jupiter.api.Assertions.assertTrue;
+import static fi.evident.raudikko.internal.utils.StringUtils.*;
+import static org.junit.jupiter.api.Assertions.*;
class StringUtilsTest {
@Test
- void verifyMatchesAt() {
+ void testMatchesAt() {
assertTrue(matchesAt("foo", 0, "foo"));
assertTrue(matchesAt("foobar", 0, "foo"));
assertFalse(matchesAt("foobar", 1, "foo"));
@@ -53,4 +52,136 @@ void verifyMatchesAt() {
assertFalse(matchesAt("foobar", -1, "ofoo"));
}
+
+ @Test
+ void testWithoutChar() {
+ assertEquals("helloworld", withoutChar("hello world", ' '));
+ assertEquals("hello world", withoutChar("hello world", 'x'));
+ }
+
+ @Test
+ void testCountOccurrences() {
+ assertEquals(3, countOccurrences("hello world", 'l'));
+ assertEquals(0, countOccurrences("hello world", 'x'));
+ }
+
+ @Test
+ void testEndsWithChar() {
+ assertTrue(endsWithChar("hello", 'o'));
+ assertFalse(endsWithChar("hello", 'l'));
+ }
+
+ @Test
+ void testStartsWithChar() {
+ assertTrue(startsWithChar("hello", 'h'));
+ assertFalse(startsWithChar("hello", 'e'));
+ }
+
+ @Test
+ void testCapitalizeIfLower() {
+ assertEquals("Abc", capitalizeIfLower("abc"));
+ assertEquals("aBc", capitalizeIfLower("aBc"));
+ }
+
+ @Test
+ void testIsAllUpper() {
+ assertTrue(isAllUpper("HELLO"));
+ assertFalse(isAllUpper("Hello"));
+ }
+
+ @Test
+ void testIsAllLower() {
+ assertTrue(isAllLower("hello"));
+ assertFalse(isAllLower("Hello"));
+ }
+
+ @Test
+ void testDecapitalize() {
+ assertEquals("hello", decapitalize("Hello"));
+ assertEquals("hello", decapitalize("hello"));
+ }
+
+ @Test
+ void testRemoveRange() {
+ assertEquals("hello world", removeRange("hello cruel world", 6, 12));
+ assertEquals("hello world", removeRange("hello world", 6, 6));
+ }
+
+ @Test
+ void testContains() {
+ assertTrue(contains("hello world", 'h'));
+ assertFalse(contains("hello world", 'x'));
+ }
+
+ @Test
+ void testIndexOf() {
+ assertEquals(0, indexOf("hello world", 'h'));
+ assertEquals(0, indexOf("hello world", 'h', 0));
+ assertEquals(-1, indexOf("hello world", 'h', 1));
+ assertEquals(-1, indexOf("hello world", 'h', 1));
+
+ assertEquals(4, indexOf("hello world", 'o'));
+ assertEquals(4, indexOf("hello world", 'o', 4));
+ assertEquals(7, indexOf("hello world", 'o', 5));
+ assertEquals(7, indexOf("hello world", 'o', 6));
+ assertEquals(7, indexOf("hello world", 'o', 7));
+ assertEquals(-1, indexOf("hello world", 'o', 8));
+
+ assertEquals(-1, indexOf("hello world", 'x'));
+ assertEquals(-1, indexOf("hello world", 'x', 4));
+ }
+
+ @Test
+ void testSwap() {
+ assertEquals("hlelow", swap("hellow", 1, 2));
+ assertEquals("hellow", swap("hellow", 2, 2));
+ }
+
+ @Test
+ void testReplaceTwoChars() {
+ assertEquals("xx", replaceTwoChars("ab", 0, 'x'));
+ assertEquals("helloxxorld", replaceTwoChars("hello world", 5, 'x'));
+ }
+
+ @Test
+ void testContainsInSubstring() {
+ assertTrue(containsInSubstring("hello world", 0, 5, 'e'));
+
+ assertFalse(containsInSubstring("hello world", 0, 5, 'w'));
+ assertFalse(containsInSubstring("hello world", 1, 5, 'h'));
+ }
+
+ @Test
+ void testContainsAdjacentCharacter() {
+ assertTrue(containsAdjacentCharacterIgnoringCase("hello", 0, 'h'));
+ assertTrue(containsAdjacentCharacterIgnoringCase("hello", 1, 'h'));
+ assertTrue(containsAdjacentCharacterIgnoringCase("hello", 1, 'e'));
+ assertTrue(containsAdjacentCharacterIgnoringCase("hello", 2, 'e'));
+ assertTrue(containsAdjacentCharacterIgnoringCase("hello", 2, 'l'));
+ assertTrue(containsAdjacentCharacterIgnoringCase("hello", 3, 'l'));
+ assertTrue(containsAdjacentCharacterIgnoringCase("hello", 4, 'l'));
+ assertTrue(containsAdjacentCharacterIgnoringCase("hello", 4, 'o'));
+ assertTrue(containsAdjacentCharacterIgnoringCase("hello", 5, 'o'));
+
+ assertFalse(containsAdjacentCharacterIgnoringCase("hello", 0, 'e'));
+ assertFalse(containsAdjacentCharacterIgnoringCase("hello", 1, 'o'));
+ assertFalse(containsAdjacentCharacterIgnoringCase("hello", 1, 'x'));
+ assertFalse(containsAdjacentCharacterIgnoringCase("hello", 5, 'x'));
+
+ assertTrue(containsAdjacentCharacterIgnoringCase("hello", 1, 'E'));
+ }
+
+ @Test
+ void testReplaceCharAt() {
+ assertEquals("hallo", replaceCharAt("hello", 1, 'a'));
+ assertEquals("hello", replaceCharAt("hello", 1, 'e'));
+ }
+
+ @Test
+ void testCharIndices() {
+ assertArrayEquals(new int[]{0}, charIndices("foo", 'f').toArray());
+ assertArrayEquals(new int[]{1,2}, charIndices("foo", 'o').toArray());
+ assertArrayEquals(new int[]{2, 3, 9}, charIndices("hello world", 'l').toArray());
+ assertArrayEquals(new int[]{}, charIndices("hello world", 'x').toArray());
+ }
}
diff --git a/src/test/resources/typing-error-suggester-test.txt b/src/test/resources/typing-error-suggester-test.txt
new file mode 100644
index 0000000..a69f4ad
--- /dev/null
+++ b/src/test/resources/typing-error-suggester-test.txt
@@ -0,0 +1,227 @@
+Asuessadn:[Asuessaan]
+ulkomaibla:[ulkomaila]
+Suomec:[]
+kansalaismn:[]
+yenkilö-:[henkilö-]
+ma:[mA;mä;me;maa;oma]
+osoitatiedot:[osoita tiedot;osoitetiedot]
+sgilyvät:[]
+ajantasaisinm:[ajantasaisin]
+Submessa:[]
+vagn,:[]
+uos:[uros;uis;jos;ulos]
+häf:[]
+vtse:[tse]
+huolehtzi:[huolehti]
+meuttuneiden:[]
+tiesojensa:[riesojensa;tirsojensa;sietojensa;tierojensa;tiesomensa]
+ilmomttamisesta:[]
+Suomees.:[]
+Ilmoimus:[]
+mucttuneista:[]
+tieyoista:[tietoista;tieroista;tiejoista]
+(nm.:[]
+avioliitto,:[avioliitto]
+avioeto,:[]
+lasteb:[lasten]
+synthmä):[]
+lghetetään:[]
+iigi-:[III-]
+jp:[jo;p;j;jpg;IP]
+väestötoetoviraston,:[]
+Pietarwaaren:[Pietarsaaren]
+toimipaikkjan,:[]
+Pb:[B;Pub;P;pH]
+26s:[26]
+68k01:[6801]
+Pietarsxari,:[]
+aai:[aasi;aari;sai;ai;AA]
+lähimpäkn:[lähimpäin]
+Sunmen:[]
+edustuotoon:[edustuototon;edustuoton;edusruotoon;edustotoon;edustuottoon]
+nykyqsessä:[]
+asuinmaassax:[asuinmaassa]
+Klselyjä:[]
+henkilöbietomuutoksista:[]
+vii:[vii;voi;viti;Vik;viis]
+lähetgää:[lähetkää;lähettää]
+syhköpostiosoitteeseen::[]
+internationai@dvv.fi:[]
+Osoitemuutoeset:[]
+stn:[sen;snt;sun]
+sijaab:[sijaan;sijaa;sija ab;sija-ab]
+homdetaan:[hohdetaan]
+Ulkomailga:[Ulkomaila;Ulkomailta]
+#asuian:[asian;asuin;asujan;astian;asuihan]
+Suomev:[]
+kansalaisgn:[]
+ilmoxtus:[]
+osoitteenmuutoksebta:[]
+llomakkeella:[lomakkeella]
+tar:[tae;taru;taro]
+lomtke.fin:[]
+sähköiseklä:[sähköisellä]
+lomakkxella.:[]
+Kcn:[]
+tijdot:[]
+väestötietojärhestelmässä:[väestötietojärjestelmässä]
+ovdt:[ovet;ovat]
+akan:[akan;ekan;akana;alan;takan]
+tmsalla,:[]
+oc:[c;o;pc;OK]
+esimerkiksh:[]
+yassin:[tassin;Hassin]
+saamiren:[saamien;saamisen]
+nopenmpaa.:[]
+Asuwessaan:[Asuessaan]
+ualkomailla:[ulkomailla;halkomailla;valkomailla]
+Suomepn:[Suomen;Suomein;Suomepin]
+kanesalaisen:[kansalaisen;kanasalaisen;kannesalaisen;aknesalaisen;sanekalaisen]
+hsenkilö-:[henkilö-]
+dja:[dia;oja;ja;d:ja;aja]
+osoiwtetiedot:[osoitetiedot]
+säjilyvät:[säilyvät]
+#ajantasailsina:[ajantassilaina;ajantasaisina;ajantasalisina]
+Suomeissa:[Suomissa;Suomessa;Suoneissa;Suomiessa;Someissa]
+vaine,:[]
+jhos:[jos]
+hpän:[hän;hään]
+yitse:[itse;ylitse]
+huoljehtii:[huolehtii]
+muuttunehiden:[muuttuneiden]
+tinetojensa:[tietojensa]
+ilmoittamnisesta:[ilmoittamisesta]
+Suomeben.:[]
+Iljmoitus:[Ilmoitus]
+mduuttuneista:[muuttuneista]
+#tiedoiista:[tiedoista;tiedioista]
+s(mm.:[]
+avioliittio,:[]
+avuioero,:[]
+lasften:[lasten]
+dsyntymä):[]
+läheteatään:[lähetetään]
+Digwi-:[Digi-]
+jda:[ja;Ida;jaa]
+väestötietovirastoon,:[väestötietovirastoon]
+Pietasrsaaren:[Pietarsaaren]
+toigmipaikkaan,:[]
+PbL:[]
+26x,:[]
+FwI-:[FI-]
+68g601:[68601]
+Pietarsaaqri,:[]
+tadi:[stadi;tai]
+lähiempään:[lähempään;lähimpään;lähiemiään;lähiemään;lähiemopään]
+Suwomen:[Suomen]
+eduwstustoon:[edustustoon]
+nykybisessä:[nykyisessä;nykybissessä]
+asuinmaassak.:[]
+Kyszelyjä:[Kyselyjä]
+xhenkilötietomuutoksista:[henkilötietomuutoksista]
+vozi:[voi]
+lähettyää:[lähettypä;lähettää;lähettyä;lähettyään;lähettyäpä]
+sähköpostioscoitteeseen::[]
+internationgal@dvv.fi:[]
+Osoitemuutokpset:[Osoitemuutokset]
+saen:[säen;sane;sen;saan;saken]
+sinjaan:[sijaan;Sonjaan;Senjaan]
+hsoidetaan:[hoidetaan]
+Ulkomyailla:[Ulkomailla]
+asduvan:[astuvan;asuvan]
+Suomten:[Suomen;Suonten;Suomeen]
+kaensalaisen:[kansalaisen;käensalaisen;käensäläisen;laensalaisen]
+ilmyoitus:[ilmoitus]
+osoitwteenmuutoksesta:[osoitteenmuutoksesta]
+-lomakkevella:[-lomakkeella]
+taui:[tau;tauti;tausi;tui;tauni]
+llomake.fin:[]
+sähköiseqllä:[sähköisellä]
+lomaskkeella.:[]
+Kgun:[Kun]
+tiledot:[tiedot]
+väestötietojälrjestelmässä:[väestötietojärjestelmässä]
+ocvat:[ovat]
+atjan:[ajan;vatjan;Anjat;Arjan;patjan]
+tasaplla,:[]
+ofn:[on]
+esdimerkiksi:[esimerkiksi]
+#pasusin:[passin;paussin;pusasin;pasurin;Pa susin]
+slaaminen:[saaminen;salaaminen;silaaminen;selaaminen]
+nopeaumpaa.:[]
+suessaan:[Suessaan;asuessaan;osuessaan;suassaan;suvessaan]
+ulkomaill:[ulkomalli;ulkomailla;ulkomaille]
+Suoen:[Suen;Suomen;Suo en;Suon;Suonen]
+kansalasen:[kansalaisen;kansalaen;kansalasien;kansalasein;kansalasten]
+henkilö:[henkilö;henkikö;henkilöt;henkilöi;henkilön]
+j:[]
+osoietiedot:[osoitetiedot]
+#säilyät:[säilät;säilymät;säilyvät;säilyt;säilyjät]
+ajantaaisina:[ajantamaisina;ajantasaisina;ajantanaisina;ajantakaisina;ajantapaisina]
+#Suomssa:[Suomissa;Suomessa;Suomassa;Suossa;Sumossa]
+vain:[vain;van;avain;vaon;vein]
+os:[oas;osa;s;o;ois]
+#hä:[hän;ha;ä;h;he]
+tse:[tse;tae;se;te;tase]
+huoletii:[huoleti;huolehtii;huolet ii;huolitie]
+muuttneiden:[muuttaneiden;muuttuneiden]
+tetojensa:[tietojensa;tetrojensa;fetojensa;Terojensa;Teojensa]
+ilmoittamiseta:[ilmoittamista;ilmoittamiset;ilmoittamisetta;ilmoittamisesta;ilmoittamiselta]
+Suomen.:[Suomen]
+lmoitus:[ilmoitus;lomitus]
+muuttneista:[muuttaneista;muuttuneista]
+tiedista:[tiedusta;tiedosta;tiedoista;tiediasta]
+mm.:[mm.;mm;kk.]
+aviolitto,:[]
+aviero,:[]
+laten:[latan;Laden;laen;latten;lasten]
+synymä):[]
+läheetään:[lähetään;lähetetään;lähdetään]
+Digi:[]
+j:[]
+väestötietoiraston,:[]
+Pietasaaren:[Pietarsaaren;Pirtasaaren;Pietisaaren]
+timipaikkaan,:[]
+L:[]
+6,:[6]
+F-:[F-;NF-;MF-;G-;PF-]
+6860:[6860;860;660;680;686]
+Pietarsaar,:[]
+ai:[ai;aie;ei;i;a]
+ähimpään:[lähimpään;vähimpään]
+Sumen:[Suen;Suomen;Suman;Sumein;Sumean]
+edutustoon:[edustustoon]
+nykyisesä:[nykyisensä;nykyisessä;nykyisestä;nykyiskesä;nykyispesä]
+asuinmaassa:[asuinmaassa;asuinmassa;asuin maassa;astuinmaassa;Asunmaassa]
+Kyelyjä:[Kylyjä;Kyselyjä]
+enkilötietomuutoksista:[henkilötietomuutoksista]
+oi:[oi;ii;oo;i;koi]
+lähetää:[lähetä;lähettää;lähentää;lähetään;lähetkää]
+sähköpostiosotteeseen::[]
+nternational@dvv.fi:[]
+Osotemuutokset:[Ositemuutokset;Osoitemuutokset;Odotemuutokset;Osaotemuutokset]
+sn:[n;s;sen;San;snt]
+sijan:[sijan;sija;sian;sijani;Aijan]
+hoidetan:[hoidatan;hoideta;hoidetaan]
+Ukomailla:[Ukonmailla;Ukkomailla;Sukomailla;Uskomailla;Ulkomailla]
+auvan:[sauvan;aivan;asuvan;vauvan;hauvan]
+Suoen:[Suen;Suomen;Suo en;Suon;Suonen]
+kansaaisen:[kansalaisen;kansamaisen;kansanaisen]
+ilmoitu:[ilmoitus;Ilmo itu]
+osoitteenmuutosesta:[osoitteenkuutosesta;osoitteenmuutoseste;osoitteenmuutoksesta;osoitteenmuutosestoa]
+-lomakeella:[-lomake Ella;-lomakkeella;-lomakehella;-lomake-Ella]
+ti:[te;i;tie;t;tiu]
+lomake.fn:[]
+sähköisell:[sähköisellä;sähköiselle]
+lomakeella.:[]
+Ku:[U;Kuu;Aku;K;Kun]
+#tieot:[teot;tiet;tieto;tiedot;lieot]
+väestötietoärjestelmässä:[väestötietojärjestelmässä]
+ova:[iva;oiva;nova;kova;ovi]
+aan:[San;alan;saan;akan;aah]
+tasall,:[]
+o:[]
+esierkiksi:[esimerkiksi;esiarkiksi;esieriksi;esirekiksi;esiherkiksi]
+passi:[passi;pässi;paasi;apassi;Pessi]
+saamine:[saamine;saameni;saamien;saaminen;saamina]
+noeampaa.:[]