Skip to content

Commit

Permalink
Fix morpheme backend info in README and docs
Browse files Browse the repository at this point in the history
  • Loading branch information
hyunwoongko committed Apr 30, 2024
1 parent 7478bc4 commit 34f0168
Show file tree
Hide file tree
Showing 11 changed files with 21 additions and 21 deletions.
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ Split texts into sentences.
Args:
text (Union[str, List[str], Tuple[str]]): single text or list/tuple of texts
backend (str): morpheme analyzer backend. 'mecab', 'pecab', 'punct' are supported
backend (str): morpheme analyzer backend. 'mecab', 'pecab', 'punct', 'fast' are supported
num_workers (Union[int, str]): the number of multiprocessing workers
strip (bool): strip all sentences or not
return_morphemes (bool): whether to return morphemes or not
Expand Down Expand Up @@ -1306,7 +1306,7 @@ This splits texts into sentences.

Args:
- text (`Union[str, List[str], Tuple[str]]`): single text or list/tuple of texts
- backend (`str`): morpheme analyzer backend. 'mecab', 'pecab', 'punct' are supported
- backend (`str`): morpheme analyzer backend. 'mecab', 'pecab', 'punct', 'fast' are supported
- num_workers (`Union[int, str]`): the number of multiprocessing workers
- strip (`bool`): strip all sentences or not
- return_morphemes (`bool`): whether to return morphemes or not
Expand All @@ -1332,7 +1332,7 @@ This corrects the spacing of the text.

Args:
- text (`Union[str, List[str], Tuple[str]]`): single text or list/tuple of texts
- backend (`str`): morpheme analyzer backend. 'mecab', 'pecab', 'punct' are supported
- backend (`str`): morpheme analyzer backend. 'mecab', 'pecab' are supported
- num_workers (`Union[int, str]`): the number of multiprocessing workers
- reset_whitespaces (`bool`): reset whitespaces or not
- return_morphemes (`bool`): whether to return morphemes or not
Expand All @@ -1359,7 +1359,7 @@ This summarizes the given text, using TextRank algorithm.

Args:
- text (`Union[str, List[str], Tuple[str]]`): single text or list/tuple of texts
- backend (`str`): morpheme analyzer backend. 'mecab', 'pecab' are supported.
- backend (`str`): morpheme analyzer backend. 'mecab', 'pecab', 'punct', 'fast' are supported.
- num_workers (`Union[int, str]`): the number of multiprocessing workers
- max_sentences (`int`): the max number of sentences in a summarization result.
- tolerance (`float`): a threshold for omitting edge weights.
Expand Down
2 changes: 1 addition & 1 deletion kss/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,4 +201,4 @@ def _find_closest_module(module, min_distance=0.5):


__ALL__ = list(supported_modules.keys()) + ["Kss"]
__version__ = "6.0.3"
__version__ = "6.0.4"
4 changes: 2 additions & 2 deletions kss/_modules/augmentation/augment.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from kss._modules.augmentation.utils import correct_josa
from kss._utils.logger import highlight_diffs, logger
from kss._utils.multiprocessing import _run_job
from kss._utils.sanity_checks import _check_text, _check_type, _check_num_workers, _check_backend_mecab_pecab_only
from kss._utils.sanity_checks import _check_text, _check_type, _check_num_workers, _check_analyzer_backend_mecab_pecab_only


def augment(
Expand Down Expand Up @@ -52,7 +52,7 @@ def augment(
josa_correction = _check_type(josa_correction, "josa_correction", bool)
verbose = _check_type(verbose, "verbose", bool)
num_workers = _check_num_workers(text, num_workers)
_check_backend_mecab_pecab_only(backend)
_check_analyzer_backend_mecab_pecab_only(backend)

if num_workers is not False and verbose:
verbose = False
Expand Down
4 changes: 2 additions & 2 deletions kss/_modules/g2p/g2p.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
)
from kss._modules.jamo._jamo import h2j, j2h
from kss._utils.multiprocessing import _run_job
from kss._utils.sanity_checks import _check_text, _check_num_workers, _check_type, _check_backend_mecab_pecab_only
from kss._utils.sanity_checks import _check_text, _check_num_workers, _check_type, _check_analyzer_backend_mecab_pecab_only


def g2p(
Expand Down Expand Up @@ -89,7 +89,7 @@ def g2p(
"convert_numbers_to_hangul_phonemes", bool)
verbose = _check_type(verbose, "verbose", bool)
num_workers = _check_num_workers(text, num_workers)
_check_backend_mecab_pecab_only(backend)
_check_analyzer_backend_mecab_pecab_only(backend)

return _run_job(
func=partial(
Expand Down
4 changes: 2 additions & 2 deletions kss/_modules/keywords/extract_keywords.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from typing import List, Union, Tuple

from kss._modules.keywords.utils import KRWordRank
from kss._utils.sanity_checks import _check_text, _check_type, _check_backend_mecab_pecab_only
from kss._utils.sanity_checks import _check_text, _check_type, _check_analyzer_backend_mecab_pecab_only


def extract_keywords(
Expand Down Expand Up @@ -67,7 +67,7 @@ def extract_keywords(
max_word_length = _check_type(max_word_length, "max_word_length", int)
return_scores = _check_type(return_scores, "return_scores", bool)
noun_only = _check_type(noun_only, "noun_only", bool)
_check_backend_mecab_pecab_only(backend)
_check_analyzer_backend_mecab_pecab_only(backend)

if num_workers != "auto":
raise ValueError("`extract_keywords` does not support `num_workers` argument")
Expand Down
4 changes: 2 additions & 2 deletions kss/_modules/morphemes/split_morphemes.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from kss._utils.multiprocessing import _run_job
from kss._utils.sanity_checks import (
_check_text,
_check_analyzer_backend,
_check_analyzer_backend_mecab_pecab_only,
_check_num_workers,
_check_type,
)
Expand Down Expand Up @@ -47,6 +47,6 @@ def split_morphemes(
return text

num_workers = _check_num_workers(text, num_workers)
backend = _check_analyzer_backend(backend)
backend = _check_analyzer_backend_mecab_pecab_only(backend)
result = _run_job(partial(backend.pos, drop_space=drop_space), text, num_workers)
return result
4 changes: 2 additions & 2 deletions kss/_modules/romanization/romanize.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from kss._modules.g2p.g2p import g2p
from kss._modules.romanization.utils import pronounce, Syllable
from kss._utils.multiprocessing import _run_job
from kss._utils.sanity_checks import _check_text, _check_type, _check_num_workers, _check_backend_mecab_pecab_only
from kss._utils.sanity_checks import _check_text, _check_type, _check_num_workers, _check_analyzer_backend_mecab_pecab_only

vowel = {
# 단모음 monophthongs
Expand Down Expand Up @@ -124,7 +124,7 @@ def romanize(
return text

use_morpheme_info = _check_type(use_morpheme_info, "use_morpheme_info", bool)
_check_backend_mecab_pecab_only(backend)
_check_analyzer_backend_mecab_pecab_only(backend)
convert_english_to_hangul_phonemes = _check_type(convert_english_to_hangul_phonemes,
"convert_english_to_hangul_phonemes", bool)
convert_numbers_to_hangul_phonemes = _check_type(convert_numbers_to_hangul_phonemes,
Expand Down
2 changes: 1 addition & 1 deletion kss/_modules/sentences/split_sentences.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def split_sentences(
Args:
text (Union[str, List[str], Tuple[str]]): single text or list/tuple of texts
backend (str): morpheme analyzer backend. 'mecab', 'pecab', 'punct' are supported
backend (str): morpheme analyzer backend. 'mecab', 'pecab', 'punct', 'fast' are supported
num_workers (Union[int, str])): the number of multiprocessing workers
strip (bool): strip all sentences or not
return_morphemes (bool): whether to return morphemes or not
Expand Down
6 changes: 3 additions & 3 deletions kss/_modules/spacing/correct_spacing.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from kss._modules.sentences.split_sentences import _split_sentences
from kss._modules.spacing.utils import postprocess, postprocess_heuristic
from kss._utils.multiprocessing import _run_job
from kss._utils.sanity_checks import _check_text, _check_backend_mecab_pecab_only, _check_num_workers
from kss._utils.sanity_checks import _check_text, _check_analyzer_backend_mecab_pecab_only, _check_num_workers

any_ws = re.compile(r"\s+")
space_insertable = r"(([^SUWX]|X[RS]|S[EH]).* ([NMI]|V[VAX]|VCN|XR|XPN|S[WLHN]))|(SN ([MI]|N[PR]|NN[GP]|V[VAX]|VCN|XR|XPN|S[WHN]))|((S[FPL]).* ([NMI]|V[VAX]|VCN|XR|XPN|S[WHN]))"
Expand Down Expand Up @@ -42,7 +42,7 @@ def correct_spacing(
Args:
text (Union[str, List[str], Tuple[str]]): single text or list/tuple of texts
backend (str): morpheme analyzer backend. 'mecab', 'pecab', 'punct' are supported
backend (str): morpheme analyzer backend. 'mecab', 'pecab' are supported
num_workers (Union[int, str])): the number of multiprocessing workers
reset_whitespaces (bool): reset whitespaces or not
return_morphemes (bool): whether to return morphemes or not
Expand All @@ -67,7 +67,7 @@ def correct_spacing(
return text

backend_string = backend
backend = _check_backend_mecab_pecab_only(backend)
backend = _check_analyzer_backend_mecab_pecab_only(backend)
_num_workers = _check_num_workers(text, num_workers)

return _run_job(
Expand Down
2 changes: 1 addition & 1 deletion kss/_modules/summarization/summarize_sentences.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def summarize_sentences(
Args:
text (Union[str, List[str], Tuple[str]]): single text or list/tuple of texts
backend (str): morpheme analyzer backend. 'mecab', 'pecab' are supported.
backend (str): morpheme analyzer backend. 'mecab', 'pecab', 'punct', 'fast' are supported.
num_workers (Union[int, str])): the number of multiprocessing workers
max_sentences (int): the max number of sentences in a summarization result.
tolerance (float): a threshold for omitting edge weights.
Expand Down
2 changes: 1 addition & 1 deletion kss/_utils/sanity_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ def _check_text(
return text, finish


def _check_backend_mecab_pecab_only(backend: str) -> Analyzer:
def _check_analyzer_backend_mecab_pecab_only(backend: str) -> Analyzer:
global MECAB_INFORM, KONLPY_MECAB_INFORM, PECAB_INFORM

if isinstance(backend, str):
Expand Down

0 comments on commit 34f0168

Please sign in to comment.