import ctypes
import logging
from queue import Queue
from typing import Optional, Any, Iterable, Iterator
import pyttsx3.drivers._espeak as espeak
from sttts.api.message import ModuleError, ModelError, ModelNotFoundError
from sttts.api.model import SynthesizerModel, Synthesizer
class EspeakSynthesizer(SynthesizerModel, Synthesizer):
"""
Speech synthesizer using the `eSpeak <https://espeak.sourceforge.net/>`__ bindings from
`pyttsx3 <https://github.com/nateshmbhat/pyttsx3>`__.
Only the actual C library wrappers are directly used, bypassing the provided loop and ``ffmpeg``-based PCM output.
As dependency, usually the ``espeak`` (or at least ``libespeak1``) package need to be installed beforehand, this
usually also makes a wide range of languages (voices) is available.
Results are understandable but typically sound rather mechanical than natural by today's standards. However, it is
a viable alternative that runs even on low-end hardware.
"""
def __init__(self, *,
model_name: Optional[str] = None, model_path: Optional[str] = None, buffer_length: float = 0.25
) -> None:
"""
:param str model_name: Voice name, for example ``default`` or ``english-us``. Omitted to list available ones.
:param str model_path: Directory which contains the espeak-data directory, omitted for default location.
:param float buffer_length: Length in seconds of sound buffers that are passed to the callback (0.25).
"""
self._logger: logging.Logger = logging.getLogger(self.__class__.__name__)
self._q: Queue = Queue()
self._sample_rate: int = espeak.Initialize(espeak.AUDIO_OUTPUT_RETRIEVAL,
bufflength=round(buffer_length * 1000),
path=model_path.encode() if model_path is not None else None)
if self._sample_rate == -1: # 22050
raise ModelError(self.__class__.__name__, model_name, 'Failed to initialize espeak')
if model_name is None or espeak.SetVoiceByName(model_name.encode()) != 0:
raise ModelNotFoundError(self.__class__.__name__, model_name, None,
[f"{v.name.decode()} ({v.languages.decode()})" for v in espeak.ListVoices()])
espeak.SetSynthCallback(self._on_synth)
def __enter__(self) -> Synthesizer:
self._logger.info(f"Entering {espeak.GetCurrentVoice()}")
return self
def __exit__(self, *args) -> None:
pass
def sample_rate(self) -> int:
return self._sample_rate
def generate(self, utterance: str) -> Iterator[bytes]:
if espeak.Synth(utterance.encode('utf-8'), flags=espeak.ENDPAUSE | espeak.CHARS_UTF8) != 0:
raise ModuleError(self.__class__.__name__, "espeak Synth call failed")
while True:
buffer: Optional[bytes] = self._q.get()
if buffer is None:
break
yield buffer
def _on_synth(self, wav: Any, num_samples: int, events: Iterable[espeak.EVENT]) -> int:
if num_samples > 0:
buffer: bytes = ctypes.string_at(wav, num_samples * ctypes.sizeof(ctypes.c_short))
self._q.put(buffer)
for event in events:
if event.type == espeak.EVENT_LIST_TERMINATED:
break
elif event.type == espeak.EVENT_WORD:
pass
elif event.type == espeak.EVENT_MSG_TERMINATED:
self._q.put(None)
pass
return 0