Home Assistant Unofficial Reference 2024.12.1
audio_enhancer.py
Go to the documentation of this file.
1 """Audio enhancement for Assist."""
2 
3 from abc import ABC, abstractmethod
4 from dataclasses import dataclass
5 import logging
6 
7 from pymicro_vad import MicroVad
8 from pyspeex_noise import AudioProcessor
9 
10 from .const import BYTES_PER_CHUNK
11 
12 _LOGGER = logging.getLogger(__name__)
13 
14 
15 @dataclass(frozen=True, slots=True)
17  """Enhanced audio chunk and metadata."""
18 
19  audio: bytes
20  """Raw PCM audio @ 16Khz with 16-bit mono samples"""
21 
22  timestamp_ms: int
23  """Timestamp relative to start of audio stream (milliseconds)"""
24 
25  speech_probability: float | None
26  """Probability that audio chunk contains speech (0-1), None if unknown"""
27 
28 
29 class AudioEnhancer(ABC):
30  """Base class for audio enhancement."""
31 
32  def __init__(
33  self, auto_gain: int, noise_suppression: int, is_vad_enabled: bool
34  ) -> None:
35  """Initialize audio enhancer."""
36  self.auto_gainauto_gain = auto_gain
37  self.noise_suppressionnoise_suppression = noise_suppression
38  self.is_vad_enabledis_vad_enabled = is_vad_enabled
39 
40  @abstractmethod
41  def enhance_chunk(self, audio: bytes, timestamp_ms: int) -> EnhancedAudioChunk:
42  """Enhance chunk of PCM audio @ 16Khz with 16-bit mono samples."""
43 
44 
46  """Audio enhancer that runs microVAD and speex."""
47 
48  def __init__(
49  self, auto_gain: int, noise_suppression: int, is_vad_enabled: bool
50  ) -> None:
51  """Initialize audio enhancer."""
52  super().__init__(auto_gain, noise_suppression, is_vad_enabled)
53 
54  self.audio_processoraudio_processor: AudioProcessor | None = None
55 
56  # Scale from 0-4
57  self.noise_suppressionnoise_suppressionnoise_suppression = noise_suppression * -15
58 
59  # Scale from 0-31
60  self.auto_gainauto_gainauto_gain = auto_gain * 300
61 
62  if (self.auto_gainauto_gainauto_gain != 0) or (self.noise_suppressionnoise_suppressionnoise_suppression != 0):
63  self.audio_processoraudio_processor = AudioProcessor(
64  self.auto_gainauto_gainauto_gain, self.noise_suppressionnoise_suppressionnoise_suppression
65  )
66  _LOGGER.debug(
67  "Initialized speex with auto_gain=%s, noise_suppression=%s",
68  self.auto_gainauto_gainauto_gain,
69  self.noise_suppressionnoise_suppressionnoise_suppression,
70  )
71 
72  self.vadvad: MicroVad | None = None
73 
74  if self.is_vad_enabledis_vad_enabled:
75  self.vadvad = MicroVad()
76  _LOGGER.debug("Initialized microVAD")
77 
78  def enhance_chunk(self, audio: bytes, timestamp_ms: int) -> EnhancedAudioChunk:
79  """Enhance 10ms chunk of PCM audio @ 16Khz with 16-bit mono samples."""
80  speech_probability: float | None = None
81 
82  assert len(audio) == BYTES_PER_CHUNK
83 
84  if self.vadvad is not None:
85  # Run VAD
86  speech_probability = self.vadvad.Process10ms(audio)
87 
88  if self.audio_processoraudio_processor is not None:
89  # Run noise suppression and auto gain
90  audio = self.audio_processoraudio_processor.Process10ms(audio).audio
91 
92  return EnhancedAudioChunk(
93  audio=audio,
94  timestamp_ms=timestamp_ms,
95  speech_probability=speech_probability,
96  )
EnhancedAudioChunk enhance_chunk(self, bytes audio, int timestamp_ms)
None __init__(self, int auto_gain, int noise_suppression, bool is_vad_enabled)
None __init__(self, int auto_gain, int noise_suppression, bool is_vad_enabled)
EnhancedAudioChunk enhance_chunk(self, bytes audio, int timestamp_ms)