Skip to content

Commit

Permalink
Merge pull request #22 from ShoukanLabs/dev
Browse files Browse the repository at this point in the history
allow using legacy phonemisation where espeak dialect is needed
  • Loading branch information
korakoe authored Feb 21, 2025
2 parents 05a8b1c + cc43b86 commit c4eb99f
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 12 deletions.
11 changes: 8 additions & 3 deletions VoPho/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,22 @@ class Phonemizer:
A class for phonemizing text in multiple languages,
"""

def __init__(self, working_path=None, stress=False):
def __init__(self, working_path=None, stress=False, legacy=False, manual_fixes=None):
"""
Initialize the Phonemizer.
:param working_path: Optional path for working directory
:param stress: Optional toggle for stress, for phonemisers that support it
"""
if manual_fixes is None:
self.manual_fixes = {}
else:
self.manual_fixes = manual_fixes
self.working_path = working_path
self.stress = stress
self._phonemizers = {}
self.Tokenizer = Tokenizer()
self.legacy = legacy

def pretty_print(self, tokens: list[Token]):
"""
Expand Down Expand Up @@ -61,13 +66,13 @@ def get_phonemizer(self, lang):
"""
if lang not in self._phonemizers:
if lang == 'en':
self._phonemizers[lang] = english.Phonemizer(stress=self.stress)
self._phonemizers[lang] = english.Phonemizer(stress=self.stress, legacy=self.legacy)
elif lang == 'ja':
self._phonemizers[lang] = japanese.Phonemizer()
elif lang == 'zh':
self._phonemizers[lang] = mandarin.Phonemizer()
elif lang == 'cy': # cyrillic treated as russian
self._phonemizers[lang] = russian.Phonemizer(working_path=self.working_path)
self._phonemizers[lang] = russian.Phonemizer(working_path=self.working_path, stress=self.stress)
elif lang == 'th':
self._phonemizers[lang] = thai.Phonemizer()
return self._phonemizers.get(lang)
Expand Down
25 changes: 17 additions & 8 deletions VoPho/phonemizers/english.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,12 +291,16 @@ def __call__(self, token):

### BASE PHONEMEISER CLASS
class Phonemizer:
def __init__(self, manual_fixes=None, allow_heteronyms=True, stress=False):
def __init__(self, manual_fixes=None, allow_heteronyms=True, stress=False, legacy=False):
self.legacy = legacy
if manual_fixes is None:
manual_fixes = manual_phonemizations
self.backend = OpenPhonemizer()
self.fallback = OpenPhonemiserFallback(backend=self.backend)
self.phonemizer = en.G2P(trf=True, british=False, fallback=self.fallback)
if not legacy:
self.backend = OpenPhonemizer()
self.fallback = OpenPhonemiserFallback(backend=self.backend)
self.phonemizer = en.G2P(trf=True, british=False, fallback=self.fallback)
else:
self.phonemizer = OpenPhonemizer()
self.manual_phonemizations = manual_fixes
self.allow_heteronyms = allow_heteronyms
self.stress = stress
Expand Down Expand Up @@ -338,9 +342,14 @@ def phonemize(self, text):
segments = self.phoneme_tag_pattern.split(preprocessed_text)

# Process each text segment (even indices) using the G2P model
for i in range(0, len(segments), 2):
if segments[i]:
segments[i] = self.phonemizer(segments[i])[0]
if not self.legacy:
for i in range(0, len(segments), 2):
if segments[i]:
segments[i] = self.phonemizer(segments[i])[0]
else:
for i in range(0, len(segments), 2):
if segments[i]:
segments[i] = self.phonemizer(segments[i])

phonemized_text = ''.join(segments)

Expand All @@ -352,7 +361,7 @@ def phonemize(self, text):


if __name__ == "__main__":
phonem = Phonemizer(stress=True)
phonem = Phonemizer(stress=True, legacy=True)
test_text = "'two heads is better than one.', "
print(f"Original: {test_text}")
print(f"Phonemized: {phonem.phonemize(test_text)}")
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "VoPho"
version = "0.0.18"
version = "0.0.19"
description = "An easy to use Multilingual phonemization meta-library"
readme = "README.md"
authors = [
Expand Down

0 comments on commit c4eb99f

Please sign in to comment.