Home Assistant Unofficial Reference 2024.12.1
stt.py
Go to the documentation of this file.
1 """Support for the Google Cloud STT service."""
2 
3 from __future__ import annotations
4 
5 from collections.abc import AsyncGenerator, AsyncIterable
6 import logging
7 
8 from google.api_core.exceptions import GoogleAPIError, Unauthenticated
9 from google.cloud import speech_v1
10 
11 from homeassistant.components.stt import (
12  AudioBitRates,
13  AudioChannels,
14  AudioCodecs,
15  AudioFormats,
16  AudioSampleRates,
17  SpeechMetadata,
18  SpeechResult,
19  SpeechResultState,
20  SpeechToTextEntity,
21 )
22 from homeassistant.config_entries import ConfigEntry
23 from homeassistant.core import HomeAssistant
24 from homeassistant.helpers import device_registry as dr
25 from homeassistant.helpers.entity_platform import AddEntitiesCallback
26 
27 from .const import (
28  CONF_SERVICE_ACCOUNT_INFO,
29  CONF_STT_MODEL,
30  DEFAULT_STT_MODEL,
31  DOMAIN,
32  STT_LANGUAGES,
33 )
34 
35 _LOGGER = logging.getLogger(__name__)
36 
37 
39  hass: HomeAssistant,
40  config_entry: ConfigEntry,
41  async_add_entities: AddEntitiesCallback,
42 ) -> None:
43  """Set up Google Cloud speech platform via config entry."""
44  service_account_info = config_entry.data[CONF_SERVICE_ACCOUNT_INFO]
45  client = speech_v1.SpeechAsyncClient.from_service_account_info(service_account_info)
46  async_add_entities([GoogleCloudSpeechToTextEntity(config_entry, client)])
47 
48 
50  """Google Cloud STT entity."""
51 
52  def __init__(
53  self,
54  entry: ConfigEntry,
55  client: speech_v1.SpeechAsyncClient,
56  ) -> None:
57  """Init Google Cloud STT entity."""
58  self._attr_unique_id_attr_unique_id = f"{entry.entry_id}"
59  self._attr_name_attr_name = entry.title
60  self._attr_device_info_attr_device_info = dr.DeviceInfo(
61  identifiers={(DOMAIN, entry.entry_id)},
62  manufacturer="Google",
63  model="Cloud",
64  entry_type=dr.DeviceEntryType.SERVICE,
65  )
66  self._entry_entry = entry
67  self._client_client = client
68  self._model_model = entry.options.get(CONF_STT_MODEL, DEFAULT_STT_MODEL)
69 
70  @property
71  def supported_languages(self) -> list[str]:
72  """Return a list of supported languages."""
73  return STT_LANGUAGES
74 
75  @property
76  def supported_formats(self) -> list[AudioFormats]:
77  """Return a list of supported formats."""
78  return [AudioFormats.WAV, AudioFormats.OGG]
79 
80  @property
81  def supported_codecs(self) -> list[AudioCodecs]:
82  """Return a list of supported codecs."""
83  return [AudioCodecs.PCM, AudioCodecs.OPUS]
84 
85  @property
86  def supported_bit_rates(self) -> list[AudioBitRates]:
87  """Return a list of supported bitrates."""
88  return [AudioBitRates.BITRATE_16]
89 
90  @property
91  def supported_sample_rates(self) -> list[AudioSampleRates]:
92  """Return a list of supported samplerates."""
93  return [AudioSampleRates.SAMPLERATE_16000]
94 
95  @property
96  def supported_channels(self) -> list[AudioChannels]:
97  """Return a list of supported channels."""
98  return [AudioChannels.CHANNEL_MONO]
99 
101  self, metadata: SpeechMetadata, stream: AsyncIterable[bytes]
102  ) -> SpeechResult:
103  """Process an audio stream to STT service."""
104  streaming_config = speech_v1.StreamingRecognitionConfig(
105  config=speech_v1.RecognitionConfig(
106  encoding=(
107  speech_v1.RecognitionConfig.AudioEncoding.OGG_OPUS
108  if metadata.codec == AudioCodecs.OPUS
109  else speech_v1.RecognitionConfig.AudioEncoding.LINEAR16
110  ),
111  sample_rate_hertz=metadata.sample_rate,
112  language_code=metadata.language,
113  model=self._model_model,
114  )
115  )
116 
117  async def request_generator() -> (
118  AsyncGenerator[speech_v1.StreamingRecognizeRequest]
119  ):
120  # The first request must only contain a streaming_config
121  yield speech_v1.StreamingRecognizeRequest(streaming_config=streaming_config)
122  # All subsequent requests must only contain audio_content
123  async for audio_content in stream:
124  yield speech_v1.StreamingRecognizeRequest(audio_content=audio_content)
125 
126  try:
127  responses = await self._client_client.streaming_recognize(
128  requests=request_generator(),
129  timeout=10,
130  )
131 
132  transcript = ""
133  async for response in responses:
134  _LOGGER.debug("response: %s", response)
135  if not response.results:
136  continue
137  result = response.results[0]
138  if not result.alternatives:
139  continue
140  transcript += response.results[0].alternatives[0].transcript
141  except GoogleAPIError as err:
142  _LOGGER.error("Error occurred during Google Cloud STT call: %s", err)
143  if isinstance(err, Unauthenticated):
144  self._entry_entry.async_start_reauth(self.hasshass)
145  return SpeechResult(None, SpeechResultState.ERROR)
146 
147  return SpeechResult(transcript, SpeechResultState.SUCCESS)
SpeechResult async_process_audio_stream(self, SpeechMetadata metadata, AsyncIterable[bytes] stream)
Definition: stt.py:102
None __init__(self, ConfigEntry entry, speech_v1.SpeechAsyncClient client)
Definition: stt.py:56
None async_setup_entry(HomeAssistant hass, ConfigEntry config_entry, AddEntitiesCallback async_add_entities)
Definition: stt.py:42