diff --git a/CHANGELOG.md b/CHANGELOG.md index 2845770..fc5f09b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ### Changes +- Add a spelling suggester ([#7](https://github.com/EvidentSolutions/raudikko/pull/7)) - Add enums for fields of Analysis where applicable. Breaking change if you were using the fields. - Bump the minimum supported Java version to 17 - Update morphology to [d3f4a0](https://github.com/voikko/corevoikko/commit/d3f4a065aa89c322f9c2476ea4d777bc4ba9ac6f) diff --git a/src/main/java/fi/evident/raudikko/Morphology.java b/src/main/java/fi/evident/raudikko/Morphology.java index bc1198b..76a5110 100644 --- a/src/main/java/fi/evident/raudikko/Morphology.java +++ b/src/main/java/fi/evident/raudikko/Morphology.java @@ -35,6 +35,7 @@ import fi.evident.raudikko.internal.fst.UnweightedTransducer; import fi.evident.raudikko.internal.fst.UnweightedVfstLoader; import fi.evident.raudikko.internal.morphology.FinnishVfstAnalyzer; +import fi.evident.raudikko.internal.suggestions.DefaultSpellingSuggester; import org.jetbrains.annotations.NotNull; import java.io.IOException; @@ -85,4 +86,14 @@ private Morphology(@NotNull UnweightedTransducer transducer) { public @NotNull Analyzer newAnalyzer(@NotNull AnalyzerConfiguration configuration) { return new FinnishVfstAnalyzer(transducer, configuration); } + + /** + * Creates a new {@link SpellingSuggester} for this morphology. + *

+ * The suggester is a mutable object that can be used repeatedly, but may not be + * shared between threads. + */ + public @NotNull SpellingSuggester newSpellingSuggester() { + return new DefaultSpellingSuggester(this); + } } diff --git a/src/main/java/fi/evident/raudikko/SpellingSuggester.java b/src/main/java/fi/evident/raudikko/SpellingSuggester.java new file mode 100644 index 0000000..78251a9 --- /dev/null +++ b/src/main/java/fi/evident/raudikko/SpellingSuggester.java @@ -0,0 +1,48 @@ +/* + * The contents of this file are subject to the Mozilla Public License Version + * 2.0 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * https://www.mozilla.org/en-US/MPL/2.0/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Libvoikko: Library of natural language processing tools. + * The Initial Developer of the Original Code is Harri Pitkänen . + * Portions created by the Initial Developer are Copyright (C) 2012 + * the Initial Developer. All Rights Reserved. + * + * Raudikko, the Java port of the Initial Code is Copyright (C) 2020 by + * Evident Solutions Oy. All Rights Reserved. + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + */ + +package fi.evident.raudikko; + +import org.jetbrains.annotations.NotNull; + +import java.util.List; + +/** + * Provides suggestions for misspelled words. + */ +public interface SpellingSuggester { + + /** + * Given a word, provides a list of spelling suggestions for it. + */ + @NotNull List provideSpellingSuggestions(@NotNull String word); +} diff --git a/src/main/java/fi/evident/raudikko/internal/suggestions/DefaultSpellingSuggester.java b/src/main/java/fi/evident/raudikko/internal/suggestions/DefaultSpellingSuggester.java new file mode 100644 index 0000000..538e725 --- /dev/null +++ b/src/main/java/fi/evident/raudikko/internal/suggestions/DefaultSpellingSuggester.java @@ -0,0 +1,194 @@ +/* + * The contents of this file are subject to the Mozilla Public License Version + * 2.0 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * https://www.mozilla.org/en-US/MPL/2.0/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Libvoikko: Library of natural language processing tools. + * The Initial Developer of the Original Code is Harri Pitkänen . + * Portions created by the Initial Developer are Copyright (C) 2012 + * the Initial Developer. All Rights Reserved. + * + * Raudikko, the Java port of the Initial Code is Copyright (C) 2020 by + * Evident Solutions Oy. All Rights Reserved. + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + */ + +package fi.evident.raudikko.internal.suggestions; + +import fi.evident.raudikko.Analyzer; +import fi.evident.raudikko.AnalyzerConfiguration; +import fi.evident.raudikko.Morphology; +import fi.evident.raudikko.SpellingSuggester; +import fi.evident.raudikko.internal.suggestions.Suggestion.SimpleSuggestion; +import fi.evident.raudikko.internal.suggestions.Suggestion.SplitSuggestion; +import fi.evident.raudikko.internal.utils.StringUtils; +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; + +import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.BiFunction; +import java.util.function.Function; +import java.util.function.UnaryOperator; +import java.util.stream.Stream; + +import static fi.evident.raudikko.internal.suggestions.Replacements.*; +import static fi.evident.raudikko.internal.utils.StringUtils.isAllUpper; +import static java.lang.Character.isUpperCase; +import static java.util.Collections.emptyList; +import static java.util.Comparator.comparing; + +public final class DefaultSpellingSuggester implements SpellingSuggester { + + private final @NotNull SpellChecker spellChecker; + + /** + * How many suggestions are returned to user + */ + private static final int MAX_SUGGESTIONS_RETURNED = 5; + + /** + * How many variations are generated for words? + */ + private static final int MAX_VARIATIONS = 800; + + /** + * Generate more suggestions than required so that sorting gets to pick the best ones + */ + private static final int MAX_SUGGESTIONS_GENERATED = 3 * MAX_SUGGESTIONS_RETURNED; + + private static final int MAX_WORD_SIZE = 255; + private static final @NotNull String COMMON_LETTERS = "aitesn"; + private static final @NotNull String UNCOMMON_LETTERS = "ulkoämrvpyhjdögfbcw:xzqå'."; + + private static final @NotNull List>> primaryGenerators = List.of( + simple(Stream::of), + simple(SuggestionGenerators::removeSoftHyphens) + ); + + private static final @NotNull List>> secondaryGenerators = List.of( + simple(SuggestionGenerators::vowelChange), + simple(SuggestionGenerators::replace, REPLACEMENTS_1_FULL), + simple(SuggestionGenerators::delete), + simple(SuggestionGenerators::insertHyphen), + simple(SuggestionGenerators::duplicateCharacters), + SuggestionGenerators::splitWord, + simple(SuggestionGenerators::replaceTwo, REPLACEMENTS_1), + simple(SuggestionGenerators::replace, REPLACEMENTS_2_FULL), + simple(SuggestionGenerators::insertion, COMMON_LETTERS), + simple(SuggestionGenerators::swap), + simple(SuggestionGenerators::replace, REPLACEMENTS_3_FULL), + simple(SuggestionGenerators::insertion, UNCOMMON_LETTERS), + simple(SuggestionGenerators::replace, REPLACEMENTS_4_FULL), + simple(SuggestionGenerators::replaceTwo, REPLACEMENTS_2), + simple(SuggestionGenerators::replaceTwo, REPLACEMENTS_3), + simple(SuggestionGenerators::replaceTwo, REPLACEMENTS_4), + simple(SuggestionGenerators::deleteTwo), + simple(SuggestionGenerators::replace, REPLACEMENTS_5_FULL) + ); + + public DefaultSpellingSuggester(@NotNull Morphology morphology) { + this.spellChecker = new SpellChecker(newAnalyzer(morphology)); + } + + @Override + public @NotNull List provideSpellingSuggestions(@NotNull String word) { + if (word.length() <= 1 || word.length() > MAX_WORD_SIZE) + return emptyList(); + + var capitalizer = capitalizer(word); + + var results1 = generateSuggestions(word, primaryGenerators); + var results2 = generateSuggestions(word, secondaryGenerators); + + return Stream.concat(results1, results2) + .map(capitalizer.compose(WordWithPriority::word)) + .distinct() + .limit(MAX_SUGGESTIONS_RETURNED) + .toList(); + } + + private @NotNull Stream generateSuggestions( + @NotNull String word, + @NotNull List>> generators + ) { + AtomicInteger count = new AtomicInteger(0); // atomicity not really needed, just box for counter + + return generators.stream() + .flatMap(g -> g.apply(word)) + .distinct() + .limit(MAX_VARIATIONS) + .flatMap(s -> Stream.ofNullable(processSuggestion(s, spellChecker))) + .limit(MAX_SUGGESTIONS_GENERATED) + .map(s -> new WordWithPriority(s.word(), s.priority() * (count.getAndIncrement() + 5))) + .sorted(comparing(WordWithPriority::priority)); + } + + private static @Nullable WordWithPriority processSuggestion(@NotNull Suggestion suggestion, @NotNull SpellChecker spellChecker) { + if (suggestion instanceof SimpleSuggestion s) { + return spellChecker.spellCheck(s.word()); + + } else if (suggestion instanceof SplitSuggestion s) { + var s1 = spellChecker.spellCheck(s.word1()); + if (s1 == null) + return null; + + var s2 = spellChecker.spellCheck(s.word2()); + if (s2 == null) + return null; + + return new WordWithPriority(s1.word() + " " + s2.word(), (s1.priority() + s2.priority()) * s.priorityMultiplier()); + } else { + throw new IllegalStateException("unexpected suggestion: " + suggestion); + } + } + + private static @NotNull UnaryOperator capitalizer(@NotNull String word) { + if (isAllUpper(word)) + return String::toUpperCase; + else if (isUpperCase(word.charAt(0))) + return StringUtils::capitalizeIfLower; + else + return UnaryOperator.identity(); + } + + private static @NotNull Analyzer newAnalyzer(@NotNull Morphology morphology) { + var config = new AnalyzerConfiguration(); + + config.setIncludeWord(true); + config.setIncludeStructure(true); + config.setIncludeBasicAttributes(true); + config.setIncludeOrganizationNameAnalysis(true); + + config.setIncludeBaseForm(false); + config.setIncludeBaseFormParts(false); + config.setIncludeFstOutput(false); + + return morphology.newAnalyzer(config); + } + + private static @NotNull Function> simple(@NotNull Function> f) { + return w -> f.apply(w).map(SimpleSuggestion::new); + } + + private static @NotNull Function> simple(@NotNull BiFunction> f, T param) { + return w -> f.apply(w, param).map(SimpleSuggestion::new); + } +} diff --git a/src/main/java/fi/evident/raudikko/internal/suggestions/Replacements.java b/src/main/java/fi/evident/raudikko/internal/suggestions/Replacements.java new file mode 100644 index 0000000..e04329f --- /dev/null +++ b/src/main/java/fi/evident/raudikko/internal/suggestions/Replacements.java @@ -0,0 +1,97 @@ +/* + * The contents of this file are subject to the Mozilla Public License Version + * 2.0 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * https://www.mozilla.org/en-US/MPL/2.0/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Libvoikko: Library of natural language processing tools. + * The Initial Developer of the Original Code is Harri Pitkänen . + * Portions created by the Initial Developer are Copyright (C) 2012 + * the Initial Developer. All Rights Reserved. + * + * Raudikko, the Java port of the Initial Code is Copyright (C) 2020 by + * Evident Solutions Oy. All Rights Reserved. + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + */ + +package fi.evident.raudikko.internal.suggestions; + +import fi.evident.raudikko.internal.utils.CharMap; +import fi.evident.raudikko.internal.utils.CollectionUtils; +import org.jetbrains.annotations.NotNull; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; + +import static java.lang.Character.isLowerCase; +import static java.lang.Character.toUpperCase; + +final class Replacements { + + static final @NotNull Replacements REPLACEMENTS_1 = parse(".,asiuiotrtdersšsanmuilkklkgoiäömnrertvbpbpoythjjhjkdtdsdföägfghgkfgfdbpbncvcswewvxczžzxqaåoåpåäåöaeiktyea"); + static final @NotNull Replacements REPLACEMENTS_2 = parse("1q2q2w3w3e4e4r5r5t6t6y7y7u8u8i9i9o0o0p+pie"); + static final @NotNull Replacements REPLACEMENTS_3 = parse("essdnhujlökjopäpmkrdvgplyhhujideölgtfvbvckwaxszaqkåaaåeéaâkcscijxz"); + static final @NotNull Replacements REPLACEMENTS_4 = parse("qwqswqwswdedefrfrgtftgthygyjuhukilokolpöpäsesxdrbgfefrftfcgygbgvhyhnhbhgjujmjnkikokmlolpöpöåäåzsxdcdcfcxvfbhnjnbmjewpåaqswszdwdcdxvcawazsq"); + static final @NotNull Replacements REPLACEMENTS_5 = parse("aooaoutlsraieääeuvvuoddokqpvvpqeeqaddarsetteryyrtuutyiiyuoippioåhvvhhmmh"); + static final @NotNull Replacements REPLACEMENTS_1_FULL = REPLACEMENTS_1.extendWithMatchingUpperCaseReplacements(); + static final @NotNull Replacements REPLACEMENTS_2_FULL = REPLACEMENTS_2.extendWithMatchingUpperCaseReplacements(); + static final @NotNull Replacements REPLACEMENTS_3_FULL = REPLACEMENTS_3.extendWithMatchingUpperCaseReplacements(); + static final @NotNull Replacements REPLACEMENTS_4_FULL = REPLACEMENTS_4.extendWithMatchingUpperCaseReplacements(); + static final @NotNull Replacements REPLACEMENTS_5_FULL = REPLACEMENTS_5.extendWithMatchingUpperCaseReplacements(); + + private final @NotNull CharMap replacementMapping; + private static final char @NotNull[] EMPTY_MAPPING = new char[0]; + + private Replacements(@NotNull CharMap replacementMapping) { + this.replacementMapping = replacementMapping; + } + + public char @NotNull [] forCharacter(char from) { + return replacementMapping.getOrDefault(from, EMPTY_MAPPING); + } + + private @NotNull Replacements extendWithMatchingUpperCaseReplacements() { + var newMapping = replacementMapping.copy(); + for (char ch : newMapping.keys()) { + var values = newMapping.get(ch); + if (values != null && isLowerCase(ch)) + newMapping.put(toUpperCase(ch), CollectionUtils.toUpperCase(values)); + } + + return new Replacements(newMapping); + } + + private static @NotNull Replacements parse(@NotNull String input) { + if (input.length() % 2 != 0) throw new IllegalArgumentException("invalid replacement string " + input); + + var mapping = new HashMap>(); + for (int i = 0; i < input.length(); i += 2) { + char from = input.charAt(i); + char to = input.charAt(i + 1); + + var targets = mapping.computeIfAbsent(from, k -> new ArrayList<>()); + targets.add(to); + } + + var result = new CharMap(); + mapping.forEach((key, value) -> result.put(key, CollectionUtils.toCharArray(value))); + return new Replacements(result); + } +} diff --git a/src/main/java/fi/evident/raudikko/internal/suggestions/SpellChecker.java b/src/main/java/fi/evident/raudikko/internal/suggestions/SpellChecker.java new file mode 100644 index 0000000..256067d --- /dev/null +++ b/src/main/java/fi/evident/raudikko/internal/suggestions/SpellChecker.java @@ -0,0 +1,138 @@ +/* + * The contents of this file are subject to the Mozilla Public License Version + * 2.0 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * https://www.mozilla.org/en-US/MPL/2.0/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Libvoikko: Library of natural language processing tools. + * The Initial Developer of the Original Code is Harri Pitkänen . + * Portions created by the Initial Developer are Copyright (C) 2012 + * the Initial Developer. All Rights Reserved. + * + * Raudikko, the Java port of the Initial Code is Copyright (C) 2020 by + * Evident Solutions Oy. All Rights Reserved. + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + */ + +package fi.evident.raudikko.internal.suggestions; + +import fi.evident.raudikko.Analysis; +import fi.evident.raudikko.Analyzer; +import fi.evident.raudikko.analysis.Structure; +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; + +import static java.lang.Math.min; + +/** + * A spell-checker that uses Analyzer to handle the spell-checking. + */ +final class SpellChecker { + + private final @NotNull Analyzer analyzer; + + public SpellChecker(@NotNull Analyzer analyzer) { + this.analyzer = analyzer; + } + + public @Nullable WordWithPriority spellCheck(@NotNull String word) { + var analyses = analyzer.analyze(word); + WordWithPriority best = null; + + for (var analysis : analyses) { + var result = createResult(word, analysis); + if (result != null && (best == null || result.priority() < best.priority())) + best = result; + } + + return best; + } + + private static @Nullable WordWithPriority createResult(@NotNull String word, @NotNull Analysis analysis) { + var structure = analysis.getStructure(); + if (structure == null) return null; + + var cr = CapitalizationResult.resolve(word, structure); + var priority = priorityFromWordClassAndInflection(analysis) * priorityFromStructure(structure) * cr.priority(); + + return new WordWithPriority(cr == CapitalizationResult.OK ? word : structure.apply(word), priority); + } + + private static int priorityFromStructure(@NotNull Structure structure) { + return 1 << (3 * (min(structure.getMorphemeCount(), 5) - 1)); + } + + private static int priorityFromWordClassAndInflection(@NotNull Analysis analysis) { + var wordClass = analysis.getWordClass(); + if (wordClass == null) + return 4; + + return switch (wordClass) { + case NOUN, ADJECTIVE, NOUN_ADJECTIVE, PRONOUN, FIRST_NAME, LAST_NAME, TOPONYM, PROPER_NOUN -> + priorityFromNounInflection(analysis); + default -> 4; // other word classes have no special handling yet + }; + } + + private static int priorityFromNounInflection(@NotNull Analysis analysis) { + var locative = analysis.getLocative(); + if (locative == null) + return 4; + + return switch (locative) { + case NOMINATIVE -> 2; + case GENITIVE -> 3; + case PARTITIVE -> 5; + case INESIVE, ILLATIVE -> 8; + case ELATIVE, ADESSIVE -> 12; + case ALLATIVE, ESSIVE, TRANSLATIVE, INSTRUCTIVE -> 20; + case ABLATIVE -> 30; + case ABESSIVE, COMITATIVE -> 60; + default -> 4; + }; + } + + private enum CapitalizationResult { + OK, FIRST_CAPITALIZED, CAPITALIZATION_ERROR; + + int priority() { + return ordinal() + 1; + } + + static @NotNull CapitalizationResult resolve(@NotNull String word, @NotNull Structure structure) { + var result = OK; + var it = structure.nonMorphemes(); + + for (int i = 0; i < word.length(); i++) { + if (!it.hasNext()) + break; + + var expected = it.next(); + if (!expected.agrees(word.charAt(i))) { + if (i == 0 && expected.isUpperCase()) + result = CapitalizationResult.FIRST_CAPITALIZED; + else + return CapitalizationResult.CAPITALIZATION_ERROR; + } + } + + return result; + } + } +} diff --git a/src/main/java/fi/evident/raudikko/internal/suggestions/Suggestion.java b/src/main/java/fi/evident/raudikko/internal/suggestions/Suggestion.java new file mode 100644 index 0000000..af950d1 --- /dev/null +++ b/src/main/java/fi/evident/raudikko/internal/suggestions/Suggestion.java @@ -0,0 +1,44 @@ +/* + * The contents of this file are subject to the Mozilla Public License Version + * 2.0 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * https://www.mozilla.org/en-US/MPL/2.0/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Libvoikko: Library of natural language processing tools. + * The Initial Developer of the Original Code is Harri Pitkänen . + * Portions created by the Initial Developer are Copyright (C) 2012 + * the Initial Developer. All Rights Reserved. + * + * Raudikko, the Java port of the Initial Code is Copyright (C) 2020 by + * Evident Solutions Oy. All Rights Reserved. + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + */ + +package fi.evident.raudikko.internal.suggestions; + +import org.jetbrains.annotations.NotNull; + +public sealed interface Suggestion permits Suggestion.SimpleSuggestion, Suggestion.SplitSuggestion { + + record SimpleSuggestion(@NotNull String word) implements Suggestion { + } + + record SplitSuggestion(@NotNull String word1, @NotNull String word2, int priorityMultiplier) implements Suggestion { + } +} diff --git a/src/main/java/fi/evident/raudikko/internal/suggestions/SuggestionGenerators.java b/src/main/java/fi/evident/raudikko/internal/suggestions/SuggestionGenerators.java new file mode 100644 index 0000000..177b078 --- /dev/null +++ b/src/main/java/fi/evident/raudikko/internal/suggestions/SuggestionGenerators.java @@ -0,0 +1,220 @@ +/* + * The contents of this file are subject to the Mozilla Public License Version + * 2.0 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * https://www.mozilla.org/en-US/MPL/2.0/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Libvoikko: Library of natural language processing tools. + * The Initial Developer of the Original Code is Harri Pitkänen . + * Portions created by the Initial Developer are Copyright (C) 2012 + * the Initial Developer. All Rights Reserved. + * + * Raudikko, the Java port of the Initial Code is Copyright (C) 2020 by + * Evident Solutions Oy. All Rights Reserved. + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + */ + +package fi.evident.raudikko.internal.suggestions; + +import fi.evident.raudikko.internal.suggestions.Suggestion.SplitSuggestion; +import fi.evident.raudikko.internal.utils.StringUtils; +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; + +import java.util.ArrayList; +import java.util.stream.IntStream; +import java.util.stream.Stream; + +import static fi.evident.raudikko.internal.utils.CharUtils.*; +import static fi.evident.raudikko.internal.utils.StringUtils.*; +import static java.lang.Math.min; + +final class SuggestionGenerators { + + /** + * Generates suggestions by deleting each character from the input word. + * A character is not deleted if it is the same as the previous character. + */ + static @NotNull Stream delete(@NotNull String word) { + return IntStream.range(0, word.length()) + .filter(i -> i == 0 || !equalsIgnoreCase(word.charAt(i), word.charAt(i - 1))) + .mapToObj(i -> removeRange(word, i, i + 1)); + } + + /** + * Generates suggestions by removing duplicate pairs of characters. + */ + static @NotNull Stream deleteTwo(@NotNull String word) { + if (word.length() < 6) + return Stream.empty(); + + return IntStream.range(0, word.length() - 3) + .filter(i -> word.regionMatches(i, word, i + 2, 2)) + .mapToObj(i -> removeRange(word, i, i + 2)); + } + + /** + * Generates a variant without soft hyphens. + */ + static @NotNull Stream removeSoftHyphens(@NotNull String word) { + var withoutHyphen = word.replace("\u00AD", ""); + + return withoutHyphen.equals(word) ? Stream.empty() : Stream.of(withoutHyphen); + } + + /** + * Generates suggestions by trying to apply each of the given replacements to each of the possible characters. + */ + static @NotNull Stream replace(@NotNull String word, @NotNull Replacements replacements) { + var result = Stream.builder(); + + for (var i = 0; i < word.length(); i++) + for (var to : replacements.forCharacter(word.charAt(i))) + result.add(replaceCharAt(word, i, to)); + + return result.build(); + } + + /** + * Generates suggestions where two a pair of same characters are replaced by pair of + * other characters using given replacement mappings. + */ + static @NotNull Stream replaceTwo(@NotNull String word, @NotNull Replacements replacements) { + var s = word.toLowerCase(); + var result = Stream.builder(); + + for (int i = 1; i < s.length(); i++) { + var ch = s.charAt(i); + if (ch == s.charAt(i - 1)) { + for (char to : replacements.forCharacter(ch)) + result.add(replaceTwoChars(s, i - 1, to)); + i++; + } + } + return result.build(); + } + + /** + * Generates suggestions by inserting given characters into the string. + * Will not insert a character next to an existing instance of it. + */ + static @NotNull Stream insertion(@NotNull String word, @NotNull String insertedChars) { + return insertedChars.chars().mapToObj(c -> insertions(word, (char) c)).flatMap(s -> s); + } + + private static @NotNull Stream insertions(@NotNull String word, char insertionChar) { + return IntStream.rangeClosed(0, word.length()) + .filter(i -> !containsAdjacentCharacterIgnoringCase(word, i, insertionChar)) + .mapToObj(i -> word.substring(0, i) + insertionChar + word.substring(i)); + } + + /** + * Generates suggestions by inserting a hyphen into various places. + * Hyphen is never inserted near an existing hyphen or near beginning or end. + */ + @NotNull + static Stream insertHyphen(@NotNull String word) { + return IntStream.range(2, word.length() - 1) + .filter(i -> !containsInSubstring(word, i - 2, i + 2, '-')) + .mapToObj(i -> word.substring(0, i) + '-' + word.substring(i)); + } + + /** + * Generates suggestions by duplicating existing characters in a word. + */ + @NotNull + static Stream duplicateCharacters(@NotNull String word) { + return IntStream.range(0, word.length()) + .filter(i -> { + char c = word.charAt(i); + return (i == 0 || word.charAt(i - 1) != c) + && (i + 1 >= word.length() || word.charAt(i + 1) != c) + && c != '-' && c != '\''; + }) + .mapToObj(i -> word.substring(0, i) + word.charAt(i) + word.substring(i)); + } + + /** + * Generates suggestions by swapping characters with nearby characters. + */ + static @NotNull Stream swap(@NotNull String word) { + var maxDistance = (word.length() <= 8) ? word.length() : (50 / word.length()); + if (maxDistance == 0) + return Stream.empty(); + + return IntStream.range(0, word.length()) + .mapToObj(i -> swapOne(word, maxDistance, i)) + .flatMap(s -> s); + } + + private static @NotNull Stream swapOne(@NotNull String word, int maxDistance, int i) { + return IntStream.range(i + 1, min(i + maxDistance + 1, word.length())) + .filter(j -> !equalsIgnoreCase(word.charAt(i), word.charAt(j)) && !isFrontOrBackVowel(word.charAt(i))) + .mapToObj(j -> StringUtils.swap(word, i, j)); + } + + /** + * Generates suggestions by converting front-vowels to back-vowels and vice versa. + */ + static @NotNull Stream vowelChange(@NotNull String word) { + var frontOrBackVowelIndices = new ArrayList(word.length()); + for (int i = 0, len = word.length(); i < len; i++) + if (isFrontOrBackVowel(word.charAt(i))) + frontOrBackVowelIndices.add(i); + + if (frontOrBackVowelIndices.size() == 0 || frontOrBackVowelIndices.size() > 7) + return Stream.empty(); + + return IntStream.range(1, 1 << frontOrBackVowelIndices.size()).mapToObj(mask -> { + var chars = word.toCharArray(); + + for (int j = 0; j < frontOrBackVowelIndices.size(); j++) { + int i = frontOrBackVowelIndices.get(j); + + if ((mask & (1 << j)) != 0) + chars[i] = convertVowelBetweenFrontAndBack(chars[i]); + } + + return new String(chars); + }); + } + + /** + * Returns suggestions by trying to split the word at various points. + */ + static @NotNull Stream splitWord(@NotNull String word) { + var lower = word.toLowerCase(); + return IntStream.range(2, lower.length() - 2) + .mapToObj(i -> splitWordAt(lower, lower.length() - i)) + .flatMap(Stream::ofNullable); + } + + private static @Nullable SplitSuggestion splitWordAt(@NotNull String word, int i) { + // Don't split if there's a nearby hyphen + if (word.charAt(i - 2) == '-' || word.charAt(i - 1) == '-' || word.charAt(i + 1) == '-') + return null; + + var hyphen = word.charAt(i) == '-'; + var word1 = word.substring(0, i); + var word2 = word.substring(i + (hyphen ? 1 : 0)); + var priorityMultiplier = hyphen ? 6 : 1; + + return new SplitSuggestion(word1, word2, priorityMultiplier); + } +} diff --git a/src/main/java/fi/evident/raudikko/internal/suggestions/WordWithPriority.java b/src/main/java/fi/evident/raudikko/internal/suggestions/WordWithPriority.java new file mode 100644 index 0000000..b67e3bb --- /dev/null +++ b/src/main/java/fi/evident/raudikko/internal/suggestions/WordWithPriority.java @@ -0,0 +1,38 @@ +/* + * The contents of this file are subject to the Mozilla Public License Version + * 2.0 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * https://www.mozilla.org/en-US/MPL/2.0/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Libvoikko: Library of natural language processing tools. + * The Initial Developer of the Original Code is Harri Pitkänen . + * Portions created by the Initial Developer are Copyright (C) 2012 + * the Initial Developer. All Rights Reserved. + * + * Raudikko, the Java port of the Initial Code is Copyright (C) 2020 by + * Evident Solutions Oy. All Rights Reserved. + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + */ + +package fi.evident.raudikko.internal.suggestions; + +import org.jetbrains.annotations.NotNull; + +record WordWithPriority(@NotNull String word, int priority) { +} diff --git a/src/main/java/fi/evident/raudikko/internal/utils/CharMap.java b/src/main/java/fi/evident/raudikko/internal/utils/CharMap.java index e83407c..0838436 100644 --- a/src/main/java/fi/evident/raudikko/internal/utils/CharMap.java +++ b/src/main/java/fi/evident/raudikko/internal/utils/CharMap.java @@ -32,11 +32,16 @@ package fi.evident.raudikko.internal.utils; +import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; +import java.util.ArrayList; +import java.util.Collection; import java.util.HashMap; import java.util.Map; +import static java.util.Collections.unmodifiableCollection; + public final class CharMap { private final Object[] low = new Object[256]; @@ -62,4 +67,28 @@ else if (high != null) else return null; } + + public @NotNull T getOrDefault(char key, @NotNull T defaultValue) { + var result = get(key); + return result != null ? result : defaultValue; + } + + public @NotNull Collection keys() { + var result = new ArrayList(); + for (int i = 0; i < low.length; i++) + if (low[i] != null) + result.add((char) i); + + if (high != null) + result.addAll(high.keySet()); + + return unmodifiableCollection(result); + } + + public @NotNull CharMap copy() { + var result = new CharMap(); + System.arraycopy(low, 0, result.low, 0, low.length); + result.high = high != null ? new HashMap<>(high) : null; + return result; + } } diff --git a/src/main/java/fi/evident/raudikko/internal/utils/CharUtils.java b/src/main/java/fi/evident/raudikko/internal/utils/CharUtils.java new file mode 100644 index 0000000..26168d0 --- /dev/null +++ b/src/main/java/fi/evident/raudikko/internal/utils/CharUtils.java @@ -0,0 +1,69 @@ +/* + * The contents of this file are subject to the Mozilla Public License Version + * 2.0 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * https://www.mozilla.org/en-US/MPL/2.0/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Libvoikko: Library of natural language processing tools. + * The Initial Developer of the Original Code is Harri Pitkänen . + * Portions created by the Initial Developer are Copyright (C) 2012 + * the Initial Developer. All Rights Reserved. + * + * Raudikko, the Java port of the Initial Code is Copyright (C) 2020 by + * Evident Solutions Oy. All Rights Reserved. + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + */ + +package fi.evident.raudikko.internal.utils; + +import org.jetbrains.annotations.NotNull; + +import static fi.evident.raudikko.internal.utils.StringUtils.contains; +import static java.lang.Character.toLowerCase; + +public final class CharUtils { + + private static final @NotNull String FRONT_AND_BACK_VOWELS = "aouAOUäöyÄÖY"; + + public static char convertVowelBetweenFrontAndBack(char c) { + return switch (c) { + case 'a' -> 'ä'; + case 'o' -> 'ö'; + case 'u' -> 'y'; + case 'A' -> 'Ä'; + case 'O' -> 'Ö'; + case 'U' -> 'Y'; + case 'ä' -> 'a'; + case 'ö' -> 'o'; + case 'y' -> 'u'; + case 'Ä' -> 'A'; + case 'Ö' -> 'O'; + case 'Y' -> 'U'; + default -> c; + }; + } + + public static boolean isFrontOrBackVowel(char ch) { + return contains(FRONT_AND_BACK_VOWELS, ch); + } + + public static boolean equalsIgnoreCase(char c1, char c2) { + return c1 == c2 || toLowerCase(c1) == toLowerCase(c2); + } +} diff --git a/src/main/java/fi/evident/raudikko/internal/utils/CollectionUtils.java b/src/main/java/fi/evident/raudikko/internal/utils/CollectionUtils.java index 1da3589..136fd6e 100644 --- a/src/main/java/fi/evident/raudikko/internal/utils/CollectionUtils.java +++ b/src/main/java/fi/evident/raudikko/internal/utils/CollectionUtils.java @@ -46,4 +46,18 @@ public static int count(@NotNull List xs, @NotNull T x) { return count; } + + public static char[] toCharArray(@NotNull List xs) { + char[] ys = new char[xs.size()]; + for (int i = 0; i < xs.size(); i++) + ys[i] = xs.get(i); + return ys; + } + + public static char[] toUpperCase(char @NotNull [] xs) { + char[] ys = new char[xs.length]; + for (int i = 0; i < xs.length; i++) + ys[i] = Character.toUpperCase(xs[i]); + return ys; + } } diff --git a/src/main/java/fi/evident/raudikko/internal/utils/StringUtils.java b/src/main/java/fi/evident/raudikko/internal/utils/StringUtils.java index c1b8383..c181b69 100644 --- a/src/main/java/fi/evident/raudikko/internal/utils/StringUtils.java +++ b/src/main/java/fi/evident/raudikko/internal/utils/StringUtils.java @@ -34,20 +34,16 @@ import org.jetbrains.annotations.NotNull; -import static java.lang.Character.toUpperCase; +import java.util.stream.IntStream; + +import static fi.evident.raudikko.internal.utils.CharUtils.equalsIgnoreCase; +import static java.lang.Character.*; public final class StringUtils { private StringUtils() { } - public static @NotNull String replaceCharAt(@NotNull String s, int i, char c) { - if (s.charAt(i) == c) return s; - var chars = s.toCharArray(); - chars[i] = c; - return new String(chars); - } - public static @NotNull String withoutChar(@NotNull CharSequence s, char removed) { var sb = new StringBuilder(s.length()); for (int i = 0, len = s.length(); i < len; i++) { @@ -89,19 +85,32 @@ public static boolean isAllLower(@NotNull CharSequence s) { return s.chars().noneMatch(Character::isUpperCase); } + public static @NotNull IntStream charIndices(@NotNull String word, char ch) { + IntStream.Builder result = IntStream.builder(); + + for (int i = word.indexOf(ch); i != -1; i = word.indexOf(ch, i + 1)) + result.add(i); + + return result.build(); + } + public static @NotNull String capitalize(@NotNull String s) { - if (s.isEmpty()) return s; + if (s.isEmpty() || isUpperCase(s.charAt(0))) return s; return toUpperCase(s.charAt(0)) + s.substring(1); } + public static @NotNull String decapitalize(@NotNull String s) { + if (s.isEmpty() || isLowerCase(s.charAt(0))) return s; + + return toLowerCase(s.charAt(0)) + s.substring(1); + } + public static @NotNull String removeRange(@NotNull String s, int startIndex, int endIndex) { if (endIndex < startIndex) throw new IndexOutOfBoundsException(); - else if (endIndex == startIndex) - return s; - else - return s.substring(0, startIndex) + s.substring(endIndex); + + return endIndex == startIndex ? s : s.substring(0, startIndex) + s.substring(endIndex); } public static boolean contains(@NotNull CharSequence s, char c) { @@ -113,7 +122,7 @@ public static int indexOf(@NotNull CharSequence s, char c) { } public static int indexOf(@NotNull CharSequence s, char c, int fromIndex) { - for (int i = fromIndex, n = s.length(); i < n; i++) + for (int i = fromIndex, len = s.length(); i < len; i++) if (s.charAt(i) == c) return i; return -1; @@ -123,13 +132,42 @@ public static boolean matchesAt(@NotNull CharSequence haystack, int offset, @Not if (offset < 0 || offset + needle.length() > haystack.length()) return false; - for (int i = 0; i < needle.length(); i++) + for (int i = 0, len = needle.length(); i < len; i++) if (haystack.charAt(i + offset) != needle.charAt(i)) return false; return true; } + public static @NotNull String swap(@NotNull String s, int i, int j) { + var chars = s.toCharArray(); + chars[i] = s.charAt(j); + chars[j] = s.charAt(i); + return new String(chars); + } + + public static @NotNull String replaceCharAt(@NotNull String s, int i, char ch) { + if (s.charAt(i) == ch) return s; + return s.substring(0, i) + ch + s.substring(i + 1); + } + + public static @NotNull String replaceTwoChars(@NotNull String word, int i, char to) { + var chars = word.toCharArray(); + chars[i] = to; + chars[i + 1] = to; + return new String(chars); + } + + public static boolean containsAdjacentCharacterIgnoringCase(@NotNull String s, int i, char ch) { + return (i > 0 && equalsIgnoreCase(ch, s.charAt(i - 1))) + || (i < s.length() && equalsIgnoreCase(ch, s.charAt(i))); + } + + public static boolean containsInSubstring(@NotNull String word, int start, int end, char ch) { + int i = word.indexOf(ch, start); + return i != -1 && i < end; + } + public static @NotNull String removeLeadingAndTrailing(@NotNull String s, char c) { if (s.isEmpty() || s.length() == 1 && s.charAt(0) == c) return ""; diff --git a/src/test/java/fi/evident/raudikko/integration/SpellingSuggesterTest.java b/src/test/java/fi/evident/raudikko/integration/SpellingSuggesterTest.java new file mode 100644 index 0000000..4b712b8 --- /dev/null +++ b/src/test/java/fi/evident/raudikko/integration/SpellingSuggesterTest.java @@ -0,0 +1,93 @@ +/* + * The contents of this file are subject to the Mozilla Public License Version + * 2.0 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * https://www.mozilla.org/en-US/MPL/2.0/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Libvoikko: Library of natural language processing tools. + * The Initial Developer of the Original Code is Harri Pitkänen . + * Portions created by the Initial Developer are Copyright (C) 2012 + * the Initial Developer. All Rights Reserved. + * + * Raudikko, the Java port of the Initial Code is Copyright (C) 2020 by + * Evident Solutions Oy. All Rights Reserved. + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + */ + +package fi.evident.raudikko.integration; + +import fi.evident.raudikko.Morphology; +import fi.evident.raudikko.SpellingSuggester; +import org.jetbrains.annotations.NotNull; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; +import org.opentest4j.TestAbortedException; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.regex.Pattern; + +import static fi.evident.raudikko.test.ResourceUtils.readLines; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.fail; +import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; + +@TestInstance(PER_CLASS) +class SpellingSuggesterTest { + + private static final @NotNull Pattern TEST_PATTERN = Pattern.compile("(.+):\\[(.*)]"); + private SpellingSuggester suggester; + + @BeforeAll + void setup() { + suggester = Morphology.loadBundled().newSpellingSuggester(); + } + + @ParameterizedTest(name = "{0}") + @MethodSource("testData") + void testSuggestions(@NotNull String line) { + var ignored = line.startsWith("#"); + if (ignored) + line = line.substring(1); + + var m = TEST_PATTERN.matcher(line); + if (!m.matches()) + fail("Invalid line '" + line + "'"); + + var word = m.group(1); + var suggestions = suggester.provideSpellingSuggestions(word); + var expected = Arrays.stream(m.group(2).split(";")).filter(s -> !s.isEmpty()).toList(); + + if (!ignored) { + assertEquals(expected, suggestions, "word: " + word); + } else { + if (expected.equals(suggestions)) + fail("PASSED test for ignored word " + word); + else + throw new TestAbortedException("ignored word " + word); + } + } + + private static @NotNull List testData() throws IOException { + return readLines("typing-error-suggester-test.txt"); + } +} diff --git a/src/test/java/fi/evident/raudikko/internal/suggestions/SpellCheckerTest.java b/src/test/java/fi/evident/raudikko/internal/suggestions/SpellCheckerTest.java new file mode 100644 index 0000000..be48fd2 --- /dev/null +++ b/src/test/java/fi/evident/raudikko/internal/suggestions/SpellCheckerTest.java @@ -0,0 +1,72 @@ +/* + * The contents of this file are subject to the Mozilla Public License Version + * 2.0 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * https://www.mozilla.org/en-US/MPL/2.0/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Libvoikko: Library of natural language processing tools. + * The Initial Developer of the Original Code is Harri Pitkänen . + * Portions created by the Initial Developer are Copyright (C) 2012 + * the Initial Developer. All Rights Reserved. + * + * Raudikko, the Java port of the Initial Code is Copyright (C) 2020 by + * Evident Solutions Oy. All Rights Reserved. + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + */ + +package fi.evident.raudikko.internal.suggestions; + +import fi.evident.raudikko.Morphology; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; + +import static org.junit.jupiter.api.Assertions.*; +import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; + +@TestInstance(PER_CLASS) +class SpellCheckerTest { + + private SpellChecker spellChecker; + + @BeforeAll + void setup() { + var morphology = Morphology.loadBundled(); + spellChecker = new SpellChecker(morphology.newAnalyzer()); + } + + @Test + void unknownWord() { + assertNull(spellChecker.spellCheck("an-english-phrase-is-not-a-finnish-word")); + } + + @Test + void knownWordCapitalizedCorrectly() { + var result = spellChecker.spellCheck("kissa"); + assertNotNull(result); + assertEquals("kissa", result.word()); + } + + @Test + void knownWordCapitalizedIncorrectly() { + var result = spellChecker.spellCheck("helsinki"); + assertNotNull(result); + assertEquals("Helsinki", result.word()); + } +} diff --git a/src/test/java/fi/evident/raudikko/internal/suggestions/SuggestionGeneratorsTest.java b/src/test/java/fi/evident/raudikko/internal/suggestions/SuggestionGeneratorsTest.java new file mode 100644 index 0000000..2dc95e1 --- /dev/null +++ b/src/test/java/fi/evident/raudikko/internal/suggestions/SuggestionGeneratorsTest.java @@ -0,0 +1,258 @@ +/* + * The contents of this file are subject to the Mozilla Public License Version + * 2.0 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * https://www.mozilla.org/en-US/MPL/2.0/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Libvoikko: Library of natural language processing tools. + * The Initial Developer of the Original Code is Harri Pitkänen . + * Portions created by the Initial Developer are Copyright (C) 2012 + * the Initial Developer. All Rights Reserved. + * + * Raudikko, the Java port of the Initial Code is Copyright (C) 2020 by + * Evident Solutions Oy. All Rights Reserved. + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + */ + +package fi.evident.raudikko.internal.suggestions; + +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; + +import java.util.List; + +import static fi.evident.raudikko.internal.suggestions.Replacements.REPLACEMENTS_1; +import static fi.evident.raudikko.internal.suggestions.Replacements.REPLACEMENTS_1_FULL; +import static fi.evident.raudikko.internal.suggestions.SuggestionGenerators.*; +import static java.util.Arrays.asList; +import static java.util.Collections.emptyList; +import static java.util.Collections.singletonList; +import static org.junit.jupiter.api.Assertions.assertEquals; + +@SuppressWarnings("SpellCheckingInspection") +class SuggestionGeneratorsTest { + + @Nested + class Delete { + + @Test + void suggestions() { + assertEquals(emptyList(), delete("").toList()); + assertEquals(asList("ello", "hllo", "helo", "hell"), delete("hello").toList()); + } + } + + @Nested + class DeleteTwo { + + @Test + void wordsUnderLimit() { + assertEquals(emptyList(), deleteTwo("").toList()); + assertEquals(emptyList(), deleteTwo("hello").toList()); + assertEquals(emptyList(), deleteTwo("aaaaa").toList()); + } + + @Test + void noConsecutiveDuplicatePairs() { + assertEquals(emptyList(), deleteTwo("abcdefgh").toList()); + assertEquals(emptyList(), deleteTwo("abcdeeefgh").toList()); + } + + @Test + void consecutiveDuplicatePairs() { + assertEquals(asList("abccdeeeef", "abccccdeef"), deleteTwo("abccccdeeeef").toList()); + } + + @Test + void nonConsecutiveDuplicatePairs() { + assertEquals(emptyList(), deleteTwo("aabbaa").toList()); + } + } + + @Nested + class RemoveSoftHyphens { + + @Test + void noSoftHyphens() { + assertEquals(emptyList(), removeSoftHyphens("").toList()); + assertEquals(emptyList(), removeSoftHyphens("foo").toList()); + } + + @Test + void softHyphensAreRemoved() { + assertEquals(singletonList("foobar"), removeSoftHyphens("foo\u00ADbar").toList()); + assertEquals(singletonList("foobarbaz"), removeSoftHyphens("foo\u00ADbar\u00ADbaz").toList()); + } + } + + + @Nested + class Replace { + + @Test + void suggestions() { + assertEquals(List.of( + "batsaneläkeruokaa", "vstsaneläkeruokaa", "vetsaneläkeruokaa", "varsaneläkeruokaa", "vadsaneläkeruokaa", + "vaysaneläkeruokaa", "vatšaneläkeruokaa", "vataaneläkeruokaa", "vatssneläkeruokaa", "vatseneläkeruokaa", + "vatsameläkeruokaa", "vatsanrläkeruokaa", "vatsanaläkeruokaa", "vatsanekäkeruokaa", "vatsanelökeruokaa", + "vatsaneläleruokaa", "vatsanelägeruokaa", "vatsaneläkrruokaa", "vatsaneläkaruokaa", "vatsaneläkeeuokaa", + "vatsaneläketuokaa", "vatsaneläkeriokaa", "vatsaneläkeruikaa", "vatsaneläkeruolaa", "vatsaneläkeruogaa", + "vatsaneläkeruoksa", "vatsaneläkeruokea", "vatsaneläkeruokas", "vatsaneläkeruokae" + ), replace("vatsaneläkeruokaa", REPLACEMENTS_1_FULL).toList()); + } + + @Test + void upperCaseReplacements() { + assertEquals(List.of("Goo", "Doo", "Fio", "Foi"), replace("Foo", REPLACEMENTS_1_FULL).toList()); + } + } + + @Nested + class ReplaceTwo { + + @Test + void suggestions() { + assertEquals(emptyList(), replaceTwo("", REPLACEMENTS_1).toList()); + assertEquals(emptyList(), replaceTwo("bar", REPLACEMENTS_1).toList()); + assertEquals(asList("fiibarbazquux", "foobarbazqiix"), replaceTwo("foobarbazquux", REPLACEMENTS_1).toList()); + assertEquals(asList("fiibarbazquux", "foobarbazqiix"), replaceTwo("foobarbazquux", REPLACEMENTS_1).toList()); + } + + @Test + void multipleConsecutiveAreProcessedJustOnce() { + assertEquals(asList("iioooo", "ooiioo", "ooooii"), replaceTwo("oooooo", REPLACEMENTS_1).toList()); + } + } + + @Nested + class Insertion { + + @Test + void suggestions() { + assertEquals(List.of( + "1foobar", "f1oobar", "fo1obar", "foo1bar", "foob1ar", "fooba1r", "foobar1", + "2foobar", "f2oobar", "fo2obar", "foo2bar", "foob2ar", "fooba2r", "foobar2", + "3foobar", "f3oobar", "fo3obar", "foo3bar", "foob3ar", "fooba3r", "foobar3" + ), insertion("foobar", "123").toList()); + } + + @Test + void suggestionsWhenInsertionCharsAreInInput() { + assertEquals(List.of( + "1foo12bar", "f1oo12bar", "fo1o12bar", "foo121bar", "foo12b1ar", "foo12ba1r", "foo12bar1", + "2foo12bar", "f2oo12bar", "fo2o12bar", "foo212bar", "foo12b2ar", "foo12ba2r", "foo12bar2", + "3foo12bar", "f3oo12bar", "fo3o12bar", "foo312bar", "foo132bar", "foo123bar", "foo12b3ar", "foo12ba3r", "foo12bar3" + ), insertion("foo12bar", "123").toList()); + } + } + + @Nested + class InsertHyphen { + + @Test + void testGenerateWithEmptyString() { + assertEquals(emptyList(), insertHyphen("").toList()); + } + + @Test + void testGenerateWithStringWithoutHyphen() { + assertEquals(List.of("ab-cdefgh", "abc-defgh", "abcd-efgh", "abcde-fgh", "abcdef-gh"), insertHyphen("abcdefgh").toList()); + } + + @Test + void stringWithHyphen() { + assertEquals(List.of("ab-cd-efgh", "abcd-ef-gh"), insertHyphen("abcd-efgh").toList()); + } + + @Test + void testGenerateWithStringWithSpecialCharacters() { + assertEquals(emptyList(), insertHyphen("a-b-'c").toList()); + } + + } + + @Nested + class DuplicateCharacters { + + @Test + void testGenerateWithEmptyString() { + assertEquals(emptyList(), duplicateCharacters("").toList()); + } + + @Test + void testGenerateWithStringWithoutHyphen() { + assertEquals(List.of("aabcdefgh", "abbcdefgh", "abccdefgh", "abcddefgh", "abcdeefgh", "abcdeffgh", "abcdefggh", "abcdefghh"), + duplicateCharacters("abcdefgh").toList()); + } + + @Test + void stringWithHyphen() { + assertEquals(List.of("aabcd-efgh", "abbcd-efgh", "abccd-efgh", "abcdd-efgh", "abcd-eefgh", "abcd-effgh", "abcd-efggh", "abcd-efghh"), + duplicateCharacters("abcd-efgh").toList()); + } + + @Test + void testGenerateWithStringWithSpecialCharacters() { + assertEquals(List.of("aa-b-'c", "a-bb-'c", "a-b-'cc"), duplicateCharacters("a-b-'c").toList()); + } + } + + @Nested + class Swap { + + @Test + void suggestions() { + assertEquals(asList( + "ofobarbaz", "oofbarbaz", "boofarbaz", "aoobfrbaz", "roobafbaz", + "fooabrbaz", "foorabbaz", "fooaarbbz", "foozarbab", "foobabraz", + "foobaabrz", "foobazbar", "foobarabz", "foobarzab" + ), swap("foobarbaz").toList()); + } + } + + @Nested + class VowelChange { + + @Test + void suggestions() { + assertEquals( + asList("hamähäkki", "hämahäkki", "hamahäkki", "hämähakki", "hamähakki", "hämahakki", "hamahakki"), + vowelChange("hämähäkki").toList()); + + assertEquals( + asList( + "äamuyö", "aämuyö", "äämuyö", "aamyyö", "äamyyö", "aämyyö", "äämyyö", "aamuuö", + "äamuuö", "aämuuö", "äämuuö", "aamyuö", "äamyuö", "aämyuö", "äämyuö", "aamuyo", + "äamuyo", "aämuyo", "äämuyo", "aamyyo", "äamyyo", "aämyyo", "äämyyo", "aamuuo", + "äamuuo", "aämuuo", "äämuuo", "aamyuo", "äamyuo", "aämyuo", "äämyuo"), + vowelChange("aamuyö").toList()); + + assertEquals(asList("öy", "ou", "öu"), vowelChange("oy").toList()); + } + } + + @Nested + class SplitWord { + + @Test + void suggestions() { + assertEquals(asList("foobarb az", "foobar baz", "fooba rbaz", "foob arbaz", "foo barbaz"), splitWord("foobarbaz").map(s -> s.word1() + " " + s.word2()).toList()); + } + } +} diff --git a/src/test/java/fi/evident/raudikko/internal/utils/CharUtilsTest.java b/src/test/java/fi/evident/raudikko/internal/utils/CharUtilsTest.java new file mode 100644 index 0000000..cf1cc92 --- /dev/null +++ b/src/test/java/fi/evident/raudikko/internal/utils/CharUtilsTest.java @@ -0,0 +1,78 @@ +/* + * The contents of this file are subject to the Mozilla Public License Version + * 2.0 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * https://www.mozilla.org/en-US/MPL/2.0/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Libvoikko: Library of natural language processing tools. + * The Initial Developer of the Original Code is Harri Pitkänen . + * Portions created by the Initial Developer are Copyright (C) 2012 + * the Initial Developer. All Rights Reserved. + * + * Raudikko, the Java port of the Initial Code is Copyright (C) 2020 by + * Evident Solutions Oy. All Rights Reserved. + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + */ + +package fi.evident.raudikko.internal.utils; + +import org.junit.jupiter.api.Test; + +import static fi.evident.raudikko.internal.utils.CharUtils.convertVowelBetweenFrontAndBack; +import static fi.evident.raudikko.internal.utils.CharUtils.equalsIgnoreCase; +import static org.junit.jupiter.api.Assertions.*; + +class CharUtilsTest { + + @Test + void testConvertBackVowelsToFront() { + assertEquals('ä', convertVowelBetweenFrontAndBack('a')); + assertEquals('ö', convertVowelBetweenFrontAndBack('o')); + assertEquals('y', convertVowelBetweenFrontAndBack('u')); + assertEquals('Ä', convertVowelBetweenFrontAndBack('A')); + assertEquals('Ö', convertVowelBetweenFrontAndBack('O')); + assertEquals('Y', convertVowelBetweenFrontAndBack('U')); + } + + @Test + void testConvertFrontVowelsToBack() { + assertEquals('a', convertVowelBetweenFrontAndBack('ä')); + assertEquals('o', convertVowelBetweenFrontAndBack('ö')); + assertEquals('u', convertVowelBetweenFrontAndBack('y')); + assertEquals('A', convertVowelBetweenFrontAndBack('Ä')); + assertEquals('O', convertVowelBetweenFrontAndBack('Ö')); + assertEquals('U', convertVowelBetweenFrontAndBack('Y')); + } + + @Test + void testNonVowelCharacters() { + assertEquals('b', convertVowelBetweenFrontAndBack('b')); + assertEquals('C', convertVowelBetweenFrontAndBack('C')); + assertEquals('1', convertVowelBetweenFrontAndBack('1')); + assertEquals('@', convertVowelBetweenFrontAndBack('@')); + } + + + @Test + void testEqualsIgnoreCase() { + assertTrue(equalsIgnoreCase('a', 'a')); + assertTrue(equalsIgnoreCase('A', 'a')); + assertFalse(equalsIgnoreCase('A', 'b')); + } +} diff --git a/src/test/java/fi/evident/raudikko/internal/utils/StringUtilsTest.java b/src/test/java/fi/evident/raudikko/internal/utils/StringUtilsTest.java index 9aad3bd..dd78bf2 100644 --- a/src/test/java/fi/evident/raudikko/internal/utils/StringUtilsTest.java +++ b/src/test/java/fi/evident/raudikko/internal/utils/StringUtilsTest.java @@ -34,14 +34,13 @@ import org.junit.jupiter.api.Test; -import static fi.evident.raudikko.internal.utils.StringUtils.matchesAt; -import static org.junit.jupiter.api.Assertions.assertFalse; -import static org.junit.jupiter.api.Assertions.assertTrue; +import static fi.evident.raudikko.internal.utils.StringUtils.*; +import static org.junit.jupiter.api.Assertions.*; class StringUtilsTest { @Test - void verifyMatchesAt() { + void testMatchesAt() { assertTrue(matchesAt("foo", 0, "foo")); assertTrue(matchesAt("foobar", 0, "foo")); assertFalse(matchesAt("foobar", 1, "foo")); @@ -53,4 +52,136 @@ void verifyMatchesAt() { assertFalse(matchesAt("foobar", -1, "ofoo")); } + + @Test + void testWithoutChar() { + assertEquals("helloworld", withoutChar("hello world", ' ')); + assertEquals("hello world", withoutChar("hello world", 'x')); + } + + @Test + void testCountOccurrences() { + assertEquals(3, countOccurrences("hello world", 'l')); + assertEquals(0, countOccurrences("hello world", 'x')); + } + + @Test + void testEndsWithChar() { + assertTrue(endsWithChar("hello", 'o')); + assertFalse(endsWithChar("hello", 'l')); + } + + @Test + void testStartsWithChar() { + assertTrue(startsWithChar("hello", 'h')); + assertFalse(startsWithChar("hello", 'e')); + } + + @Test + void testCapitalizeIfLower() { + assertEquals("Abc", capitalizeIfLower("abc")); + assertEquals("aBc", capitalizeIfLower("aBc")); + } + + @Test + void testIsAllUpper() { + assertTrue(isAllUpper("HELLO")); + assertFalse(isAllUpper("Hello")); + } + + @Test + void testIsAllLower() { + assertTrue(isAllLower("hello")); + assertFalse(isAllLower("Hello")); + } + + @Test + void testDecapitalize() { + assertEquals("hello", decapitalize("Hello")); + assertEquals("hello", decapitalize("hello")); + } + + @Test + void testRemoveRange() { + assertEquals("hello world", removeRange("hello cruel world", 6, 12)); + assertEquals("hello world", removeRange("hello world", 6, 6)); + } + + @Test + void testContains() { + assertTrue(contains("hello world", 'h')); + assertFalse(contains("hello world", 'x')); + } + + @Test + void testIndexOf() { + assertEquals(0, indexOf("hello world", 'h')); + assertEquals(0, indexOf("hello world", 'h', 0)); + assertEquals(-1, indexOf("hello world", 'h', 1)); + assertEquals(-1, indexOf("hello world", 'h', 1)); + + assertEquals(4, indexOf("hello world", 'o')); + assertEquals(4, indexOf("hello world", 'o', 4)); + assertEquals(7, indexOf("hello world", 'o', 5)); + assertEquals(7, indexOf("hello world", 'o', 6)); + assertEquals(7, indexOf("hello world", 'o', 7)); + assertEquals(-1, indexOf("hello world", 'o', 8)); + + assertEquals(-1, indexOf("hello world", 'x')); + assertEquals(-1, indexOf("hello world", 'x', 4)); + } + + @Test + void testSwap() { + assertEquals("hlelow", swap("hellow", 1, 2)); + assertEquals("hellow", swap("hellow", 2, 2)); + } + + @Test + void testReplaceTwoChars() { + assertEquals("xx", replaceTwoChars("ab", 0, 'x')); + assertEquals("helloxxorld", replaceTwoChars("hello world", 5, 'x')); + } + + @Test + void testContainsInSubstring() { + assertTrue(containsInSubstring("hello world", 0, 5, 'e')); + + assertFalse(containsInSubstring("hello world", 0, 5, 'w')); + assertFalse(containsInSubstring("hello world", 1, 5, 'h')); + } + + @Test + void testContainsAdjacentCharacter() { + assertTrue(containsAdjacentCharacterIgnoringCase("hello", 0, 'h')); + assertTrue(containsAdjacentCharacterIgnoringCase("hello", 1, 'h')); + assertTrue(containsAdjacentCharacterIgnoringCase("hello", 1, 'e')); + assertTrue(containsAdjacentCharacterIgnoringCase("hello", 2, 'e')); + assertTrue(containsAdjacentCharacterIgnoringCase("hello", 2, 'l')); + assertTrue(containsAdjacentCharacterIgnoringCase("hello", 3, 'l')); + assertTrue(containsAdjacentCharacterIgnoringCase("hello", 4, 'l')); + assertTrue(containsAdjacentCharacterIgnoringCase("hello", 4, 'o')); + assertTrue(containsAdjacentCharacterIgnoringCase("hello", 5, 'o')); + + assertFalse(containsAdjacentCharacterIgnoringCase("hello", 0, 'e')); + assertFalse(containsAdjacentCharacterIgnoringCase("hello", 1, 'o')); + assertFalse(containsAdjacentCharacterIgnoringCase("hello", 1, 'x')); + assertFalse(containsAdjacentCharacterIgnoringCase("hello", 5, 'x')); + + assertTrue(containsAdjacentCharacterIgnoringCase("hello", 1, 'E')); + } + + @Test + void testReplaceCharAt() { + assertEquals("hallo", replaceCharAt("hello", 1, 'a')); + assertEquals("hello", replaceCharAt("hello", 1, 'e')); + } + + @Test + void testCharIndices() { + assertArrayEquals(new int[]{0}, charIndices("foo", 'f').toArray()); + assertArrayEquals(new int[]{1,2}, charIndices("foo", 'o').toArray()); + assertArrayEquals(new int[]{2, 3, 9}, charIndices("hello world", 'l').toArray()); + assertArrayEquals(new int[]{}, charIndices("hello world", 'x').toArray()); + } } diff --git a/src/test/resources/typing-error-suggester-test.txt b/src/test/resources/typing-error-suggester-test.txt new file mode 100644 index 0000000..a69f4ad --- /dev/null +++ b/src/test/resources/typing-error-suggester-test.txt @@ -0,0 +1,227 @@ +Asuessadn:[Asuessaan] +ulkomaibla:[ulkomaila] +Suomec:[] +kansalaismn:[] +yenkilö-:[henkilö-] +ma:[mA;mä;me;maa;oma] +osoitatiedot:[osoita tiedot;osoitetiedot] +sgilyvät:[] +ajantasaisinm:[ajantasaisin] +Submessa:[] +vagn,:[] +uos:[uros;uis;jos;ulos] +häf:[] +vtse:[tse] +huolehtzi:[huolehti] +meuttuneiden:[] +tiesojensa:[riesojensa;tirsojensa;sietojensa;tierojensa;tiesomensa] +ilmomttamisesta:[] +Suomees.:[] +Ilmoimus:[] +mucttuneista:[] +tieyoista:[tietoista;tieroista;tiejoista] +(nm.:[] +avioliitto,:[avioliitto] +avioeto,:[] +lasteb:[lasten] +synthmä):[] +lghetetään:[] +iigi-:[III-] +jp:[jo;p;j;jpg;IP] +väestötoetoviraston,:[] +Pietarwaaren:[Pietarsaaren] +toimipaikkjan,:[] +Pb:[B;Pub;P;pH] +26s:[26] +68k01:[6801] +Pietarsxari,:[] +aai:[aasi;aari;sai;ai;AA] +lähimpäkn:[lähimpäin] +Sunmen:[] +edustuotoon:[edustuototon;edustuoton;edusruotoon;edustotoon;edustuottoon] +nykyqsessä:[] +asuinmaassax:[asuinmaassa] +Klselyjä:[] +henkilöbietomuutoksista:[] +vii:[vii;voi;viti;Vik;viis] +lähetgää:[lähetkää;lähettää] +syhköpostiosoitteeseen::[] +internationai@dvv.fi:[] +Osoitemuutoeset:[] +stn:[sen;snt;sun] +sijaab:[sijaan;sijaa;sija ab;sija-ab] +homdetaan:[hohdetaan] +Ulkomailga:[Ulkomaila;Ulkomailta] +#asuian:[asian;asuin;asujan;astian;asuihan] +Suomev:[] +kansalaisgn:[] +ilmoxtus:[] +osoitteenmuutoksebta:[] +llomakkeella:[lomakkeella] +tar:[tae;taru;taro] +lomtke.fin:[] +sähköiseklä:[sähköisellä] +lomakkxella.:[] +Kcn:[] +tijdot:[] +väestötietojärhestelmässä:[väestötietojärjestelmässä] +ovdt:[ovet;ovat] +akan:[akan;ekan;akana;alan;takan] +tmsalla,:[] +oc:[c;o;pc;OK] +esimerkiksh:[] +yassin:[tassin;Hassin] +saamiren:[saamien;saamisen] +nopenmpaa.:[] +Asuwessaan:[Asuessaan] +ualkomailla:[ulkomailla;halkomailla;valkomailla] +Suomepn:[Suomen;Suomein;Suomepin] +kanesalaisen:[kansalaisen;kanasalaisen;kannesalaisen;aknesalaisen;sanekalaisen] +hsenkilö-:[henkilö-] +dja:[dia;oja;ja;d:ja;aja] +osoiwtetiedot:[osoitetiedot] +säjilyvät:[säilyvät] +#ajantasailsina:[ajantassilaina;ajantasaisina;ajantasalisina] +Suomeissa:[Suomissa;Suomessa;Suoneissa;Suomiessa;Someissa] +vaine,:[] +jhos:[jos] +hpän:[hän;hään] +yitse:[itse;ylitse] +huoljehtii:[huolehtii] +muuttunehiden:[muuttuneiden] +tinetojensa:[tietojensa] +ilmoittamnisesta:[ilmoittamisesta] +Suomeben.:[] +Iljmoitus:[Ilmoitus] +mduuttuneista:[muuttuneista] +#tiedoiista:[tiedoista;tiedioista] +s(mm.:[] +avioliittio,:[] +avuioero,:[] +lasften:[lasten] +dsyntymä):[] +läheteatään:[lähetetään] +Digwi-:[Digi-] +jda:[ja;Ida;jaa] +väestötietovirastoon,:[väestötietovirastoon] +Pietasrsaaren:[Pietarsaaren] +toigmipaikkaan,:[] +PbL:[] +26x,:[] +FwI-:[FI-] +68g601:[68601] +Pietarsaaqri,:[] +tadi:[stadi;tai] +lähiempään:[lähempään;lähimpään;lähiemiään;lähiemään;lähiemopään] +Suwomen:[Suomen] +eduwstustoon:[edustustoon] +nykybisessä:[nykyisessä;nykybissessä] +asuinmaassak.:[] +Kyszelyjä:[Kyselyjä] +xhenkilötietomuutoksista:[henkilötietomuutoksista] +vozi:[voi] +lähettyää:[lähettypä;lähettää;lähettyä;lähettyään;lähettyäpä] +sähköpostioscoitteeseen::[] +internationgal@dvv.fi:[] +Osoitemuutokpset:[Osoitemuutokset] +saen:[säen;sane;sen;saan;saken] +sinjaan:[sijaan;Sonjaan;Senjaan] +hsoidetaan:[hoidetaan] +Ulkomyailla:[Ulkomailla] +asduvan:[astuvan;asuvan] +Suomten:[Suomen;Suonten;Suomeen] +kaensalaisen:[kansalaisen;käensalaisen;käensäläisen;laensalaisen] +ilmyoitus:[ilmoitus] +osoitwteenmuutoksesta:[osoitteenmuutoksesta] +-lomakkevella:[-lomakkeella] +taui:[tau;tauti;tausi;tui;tauni] +llomake.fin:[] +sähköiseqllä:[sähköisellä] +lomaskkeella.:[] +Kgun:[Kun] +tiledot:[tiedot] +väestötietojälrjestelmässä:[väestötietojärjestelmässä] +ocvat:[ovat] +atjan:[ajan;vatjan;Anjat;Arjan;patjan] +tasaplla,:[] +ofn:[on] +esdimerkiksi:[esimerkiksi] +#pasusin:[passin;paussin;pusasin;pasurin;Pa susin] +slaaminen:[saaminen;salaaminen;silaaminen;selaaminen] +nopeaumpaa.:[] +suessaan:[Suessaan;asuessaan;osuessaan;suassaan;suvessaan] +ulkomaill:[ulkomalli;ulkomailla;ulkomaille] +Suoen:[Suen;Suomen;Suo en;Suon;Suonen] +kansalasen:[kansalaisen;kansalaen;kansalasien;kansalasein;kansalasten] +henkilö:[henkilö;henkikö;henkilöt;henkilöi;henkilön] +j:[] +osoietiedot:[osoitetiedot] +#säilyät:[säilät;säilymät;säilyvät;säilyt;säilyjät] +ajantaaisina:[ajantamaisina;ajantasaisina;ajantanaisina;ajantakaisina;ajantapaisina] +#Suomssa:[Suomissa;Suomessa;Suomassa;Suossa;Sumossa] +vain:[vain;van;avain;vaon;vein] +os:[oas;osa;s;o;ois] +#hä:[hän;ha;ä;h;he] +tse:[tse;tae;se;te;tase] +huoletii:[huoleti;huolehtii;huolet ii;huolitie] +muuttneiden:[muuttaneiden;muuttuneiden] +tetojensa:[tietojensa;tetrojensa;fetojensa;Terojensa;Teojensa] +ilmoittamiseta:[ilmoittamista;ilmoittamiset;ilmoittamisetta;ilmoittamisesta;ilmoittamiselta] +Suomen.:[Suomen] +lmoitus:[ilmoitus;lomitus] +muuttneista:[muuttaneista;muuttuneista] +tiedista:[tiedusta;tiedosta;tiedoista;tiediasta] +mm.:[mm.;mm;kk.] +aviolitto,:[] +aviero,:[] +laten:[latan;Laden;laen;latten;lasten] +synymä):[] +läheetään:[lähetään;lähetetään;lähdetään] +Digi:[] +j:[] +väestötietoiraston,:[] +Pietasaaren:[Pietarsaaren;Pirtasaaren;Pietisaaren] +timipaikkaan,:[] +L:[] +6,:[6] +F-:[F-;NF-;MF-;G-;PF-] +6860:[6860;860;660;680;686] +Pietarsaar,:[] +ai:[ai;aie;ei;i;a] +ähimpään:[lähimpään;vähimpään] +Sumen:[Suen;Suomen;Suman;Sumein;Sumean] +edutustoon:[edustustoon] +nykyisesä:[nykyisensä;nykyisessä;nykyisestä;nykyiskesä;nykyispesä] +asuinmaassa:[asuinmaassa;asuinmassa;asuin maassa;astuinmaassa;Asunmaassa] +Kyelyjä:[Kylyjä;Kyselyjä] +enkilötietomuutoksista:[henkilötietomuutoksista] +oi:[oi;ii;oo;i;koi] +lähetää:[lähetä;lähettää;lähentää;lähetään;lähetkää] +sähköpostiosotteeseen::[] +nternational@dvv.fi:[] +Osotemuutokset:[Ositemuutokset;Osoitemuutokset;Odotemuutokset;Osaotemuutokset] +sn:[n;s;sen;San;snt] +sijan:[sijan;sija;sian;sijani;Aijan] +hoidetan:[hoidatan;hoideta;hoidetaan] +Ukomailla:[Ukonmailla;Ukkomailla;Sukomailla;Uskomailla;Ulkomailla] +auvan:[sauvan;aivan;asuvan;vauvan;hauvan] +Suoen:[Suen;Suomen;Suo en;Suon;Suonen] +kansaaisen:[kansalaisen;kansamaisen;kansanaisen] +ilmoitu:[ilmoitus;Ilmo itu] +osoitteenmuutosesta:[osoitteenkuutosesta;osoitteenmuutoseste;osoitteenmuutoksesta;osoitteenmuutosestoa] +-lomakeella:[-lomake Ella;-lomakkeella;-lomakehella;-lomake-Ella] +ti:[te;i;tie;t;tiu] +lomake.fn:[] +sähköisell:[sähköisellä;sähköiselle] +lomakeella.:[] +Ku:[U;Kuu;Aku;K;Kun] +#tieot:[teot;tiet;tieto;tiedot;lieot] +väestötietoärjestelmässä:[väestötietojärjestelmässä] +ova:[iva;oiva;nova;kova;ovi] +aan:[San;alan;saan;akan;aah] +tasall,:[] +o:[] +esierkiksi:[esimerkiksi;esiarkiksi;esieriksi;esirekiksi;esiherkiksi] +passi:[passi;pässi;paasi;apassi;Pessi] +saamine:[saamine;saameni;saamien;saaminen;saamina] +noeampaa.:[]