Home Assistant Unofficial Reference 2024.12.1
stt.py
Go to the documentation of this file.
1 """Support for Wyoming speech-to-text services."""
2 
3 from collections.abc import AsyncIterable
4 import logging
5 
6 from wyoming.asr import Transcribe, Transcript
7 from wyoming.audio import AudioChunk, AudioStart, AudioStop
8 from wyoming.client import AsyncTcpClient
9 
10 from homeassistant.components import stt
11 from homeassistant.config_entries import ConfigEntry
12 from homeassistant.core import HomeAssistant
13 from homeassistant.helpers.entity_platform import AddEntitiesCallback
14 
15 from .const import DOMAIN, SAMPLE_CHANNELS, SAMPLE_RATE, SAMPLE_WIDTH
16 from .data import WyomingService
17 from .error import WyomingError
18 from .models import DomainDataItem
19 
20 _LOGGER = logging.getLogger(__name__)
21 
22 
24  hass: HomeAssistant,
25  config_entry: ConfigEntry,
26  async_add_entities: AddEntitiesCallback,
27 ) -> None:
28  """Set up Wyoming speech-to-text."""
29  item: DomainDataItem = hass.data[DOMAIN][config_entry.entry_id]
31  [
32  WyomingSttProvider(config_entry, item.service),
33  ]
34  )
35 
36 
38  """Wyoming speech-to-text provider."""
39 
40  def __init__(
41  self,
42  config_entry: ConfigEntry,
43  service: WyomingService,
44  ) -> None:
45  """Set up provider."""
46  self.serviceservice = service
47  asr_service = service.info.asr[0]
48 
49  model_languages: set[str] = set()
50  for asr_model in asr_service.models:
51  if asr_model.installed:
52  model_languages.update(asr_model.languages)
53 
54  self._supported_languages_supported_languages = list(model_languages)
55  self._attr_name_attr_name = asr_service.name
56  self._attr_unique_id_attr_unique_id = f"{config_entry.entry_id}-stt"
57 
58  @property
59  def supported_languages(self) -> list[str]:
60  """Return a list of supported languages."""
61  return self._supported_languages_supported_languages
62 
63  @property
64  def supported_formats(self) -> list[stt.AudioFormats]:
65  """Return a list of supported formats."""
66  return [stt.AudioFormats.WAV]
67 
68  @property
69  def supported_codecs(self) -> list[stt.AudioCodecs]:
70  """Return a list of supported codecs."""
71  return [stt.AudioCodecs.PCM]
72 
73  @property
74  def supported_bit_rates(self) -> list[stt.AudioBitRates]:
75  """Return a list of supported bitrates."""
76  return [stt.AudioBitRates.BITRATE_16]
77 
78  @property
79  def supported_sample_rates(self) -> list[stt.AudioSampleRates]:
80  """Return a list of supported samplerates."""
81  return [stt.AudioSampleRates.SAMPLERATE_16000]
82 
83  @property
84  def supported_channels(self) -> list[stt.AudioChannels]:
85  """Return a list of supported channels."""
86  return [stt.AudioChannels.CHANNEL_MONO]
87 
89  self, metadata: stt.SpeechMetadata, stream: AsyncIterable[bytes]
90  ) -> stt.SpeechResult:
91  """Process an audio stream to STT service."""
92  try:
93  async with AsyncTcpClient(self.serviceservice.host, self.serviceservice.port) as client:
94  # Set transcription language
95  await client.write_event(Transcribe(language=metadata.language).event())
96 
97  # Begin audio stream
98  await client.write_event(
99  AudioStart(
100  rate=SAMPLE_RATE,
101  width=SAMPLE_WIDTH,
102  channels=SAMPLE_CHANNELS,
103  ).event(),
104  )
105 
106  async for audio_bytes in stream:
107  chunk = AudioChunk(
108  rate=SAMPLE_RATE,
109  width=SAMPLE_WIDTH,
110  channels=SAMPLE_CHANNELS,
111  audio=audio_bytes,
112  )
113  await client.write_event(chunk.event())
114 
115  # End audio stream
116  await client.write_event(AudioStop().event())
117 
118  while True:
119  event = await client.read_event()
120  if event is None:
121  _LOGGER.debug("Connection lost")
122  return stt.SpeechResult(None, stt.SpeechResultState.ERROR)
123 
124  if Transcript.is_type(event.type):
125  transcript = Transcript.from_event(event)
126  text = transcript.text
127  break
128 
129  except (OSError, WyomingError):
130  _LOGGER.exception("Error processing audio stream")
131  return stt.SpeechResult(None, stt.SpeechResultState.ERROR)
132 
133  return stt.SpeechResult(
134  text,
135  stt.SpeechResultState.SUCCESS,
136  )
list[stt.AudioSampleRates] supported_sample_rates(self)
Definition: stt.py:79
stt.SpeechResult async_process_audio_stream(self, stt.SpeechMetadata metadata, AsyncIterable[bytes] stream)
Definition: stt.py:90
list[stt.AudioCodecs] supported_codecs(self)
Definition: stt.py:69
list[stt.AudioChannels] supported_channels(self)
Definition: stt.py:84
list[stt.AudioBitRates] supported_bit_rates(self)
Definition: stt.py:74
None __init__(self, ConfigEntry config_entry, WyomingService service)
Definition: stt.py:44
list[stt.AudioFormats] supported_formats(self)
Definition: stt.py:64
None async_setup_entry(HomeAssistant hass, ConfigEntry config_entry, AddEntitiesCallback async_add_entities)
Definition: stt.py:27